venv in windows used with py, untill crypt module created a problem and forms with flask cannot be developed in windows grr
parent
7f4a829a79
commit
e835830829
@ -0,0 +1,405 @@
|
|||||||
|
<#
|
||||||
|
.Synopsis
|
||||||
|
Activate a Python virtual environment for the current PowerShell session.
|
||||||
|
|
||||||
|
.Description
|
||||||
|
Pushes the python executable for a virtual environment to the front of the
|
||||||
|
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||||
|
in a Python virtual environment. Makes use of the command line switches as
|
||||||
|
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||||
|
|
||||||
|
.Parameter VenvDir
|
||||||
|
Path to the directory that contains the virtual environment to activate. The
|
||||||
|
default value for this is the parent of the directory that the Activate.ps1
|
||||||
|
script is located within.
|
||||||
|
|
||||||
|
.Parameter Prompt
|
||||||
|
The prompt prefix to display when this virtual environment is activated. By
|
||||||
|
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||||
|
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||||
|
|
||||||
|
.Example
|
||||||
|
Activate.ps1
|
||||||
|
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||||
|
|
||||||
|
.Example
|
||||||
|
Activate.ps1 -Verbose
|
||||||
|
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||||
|
and shows extra information about the activation as it executes.
|
||||||
|
|
||||||
|
.Example
|
||||||
|
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||||
|
Activates the Python virtual environment located in the specified location.
|
||||||
|
|
||||||
|
.Example
|
||||||
|
Activate.ps1 -Prompt "MyPython"
|
||||||
|
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||||
|
and prefixes the current prompt with the specified string (surrounded in
|
||||||
|
parentheses) while the virtual environment is active.
|
||||||
|
|
||||||
|
.Notes
|
||||||
|
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||||
|
execution policy for the user. You can do this by issuing the following PowerShell
|
||||||
|
command:
|
||||||
|
|
||||||
|
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||||
|
|
||||||
|
For more information on Execution Policies:
|
||||||
|
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||||
|
|
||||||
|
#>
|
||||||
|
Param(
|
||||||
|
[Parameter(Mandatory = $false)]
|
||||||
|
[String]
|
||||||
|
$VenvDir,
|
||||||
|
[Parameter(Mandatory = $false)]
|
||||||
|
[String]
|
||||||
|
$Prompt
|
||||||
|
)
|
||||||
|
|
||||||
|
<# Function declarations --------------------------------------------------- #>
|
||||||
|
|
||||||
|
<#
|
||||||
|
.Synopsis
|
||||||
|
Remove all shell session elements added by the Activate script, including the
|
||||||
|
addition of the virtual environment's Python executable from the beginning of
|
||||||
|
the PATH variable.
|
||||||
|
|
||||||
|
.Parameter NonDestructive
|
||||||
|
If present, do not remove this function from the global namespace for the
|
||||||
|
session.
|
||||||
|
|
||||||
|
#>
|
||||||
|
function global:deactivate ([switch]$NonDestructive) {
|
||||||
|
# Revert to original values
|
||||||
|
|
||||||
|
# The prior prompt:
|
||||||
|
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||||
|
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||||
|
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||||
|
}
|
||||||
|
|
||||||
|
# The prior PYTHONHOME:
|
||||||
|
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||||
|
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||||
|
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||||
|
}
|
||||||
|
|
||||||
|
# The prior PATH:
|
||||||
|
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||||
|
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||||
|
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||||
|
}
|
||||||
|
|
||||||
|
# Just remove the VIRTUAL_ENV altogether:
|
||||||
|
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||||
|
Remove-Item -Path env:VIRTUAL_ENV
|
||||||
|
}
|
||||||
|
|
||||||
|
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
||||||
|
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
||||||
|
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
||||||
|
}
|
||||||
|
|
||||||
|
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||||
|
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||||
|
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||||
|
}
|
||||||
|
|
||||||
|
# Leave deactivate function in the global namespace if requested:
|
||||||
|
if (-not $NonDestructive) {
|
||||||
|
Remove-Item -Path function:deactivate
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
<#
|
||||||
|
.Description
|
||||||
|
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||||
|
given folder, and returns them in a map.
|
||||||
|
|
||||||
|
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||||
|
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||||
|
then it is considered a `key = value` line. The left hand string is the key,
|
||||||
|
the right hand is the value.
|
||||||
|
|
||||||
|
If the value starts with a `'` or a `"` then the first and last character is
|
||||||
|
stripped from the value before being captured.
|
||||||
|
|
||||||
|
.Parameter ConfigDir
|
||||||
|
Path to the directory that contains the `pyvenv.cfg` file.
|
||||||
|
#>
|
||||||
|
function Get-PyVenvConfig(
|
||||||
|
[String]
|
||||||
|
$ConfigDir
|
||||||
|
) {
|
||||||
|
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||||
|
|
||||||
|
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||||
|
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||||
|
|
||||||
|
# An empty map will be returned if no config file is found.
|
||||||
|
$pyvenvConfig = @{ }
|
||||||
|
|
||||||
|
if ($pyvenvConfigPath) {
|
||||||
|
|
||||||
|
Write-Verbose "File exists, parse `key = value` lines"
|
||||||
|
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||||
|
|
||||||
|
$pyvenvConfigContent | ForEach-Object {
|
||||||
|
$keyval = $PSItem -split "\s*=\s*", 2
|
||||||
|
if ($keyval[0] -and $keyval[1]) {
|
||||||
|
$val = $keyval[1]
|
||||||
|
|
||||||
|
# Remove extraneous quotations around a string value.
|
||||||
|
if ("'""".Contains($val.Substring(0, 1))) {
|
||||||
|
$val = $val.Substring(1, $val.Length - 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
$pyvenvConfig[$keyval[0]] = $val
|
||||||
|
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $pyvenvConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
<# Begin Activate script --------------------------------------------------- #>
|
||||||
|
|
||||||
|
# Determine the containing directory of this script
|
||||||
|
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||||
|
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||||
|
|
||||||
|
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||||
|
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||||
|
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||||
|
|
||||||
|
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||||
|
# First, get the location of the virtual environment, it might not be
|
||||||
|
# VenvExecDir if specified on the command line.
|
||||||
|
if ($VenvDir) {
|
||||||
|
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||||
|
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||||
|
Write-Verbose "VenvDir=$VenvDir"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||||
|
# as `prompt`.
|
||||||
|
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||||
|
|
||||||
|
# Next, set the prompt from the command line, or the config file, or
|
||||||
|
# just use the name of the virtual environment folder.
|
||||||
|
if ($Prompt) {
|
||||||
|
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||||
|
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||||
|
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||||
|
$Prompt = $pyvenvCfg['prompt'];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virutal environment)"
|
||||||
|
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||||
|
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Verbose "Prompt = '$Prompt'"
|
||||||
|
Write-Verbose "VenvDir='$VenvDir'"
|
||||||
|
|
||||||
|
# Deactivate any currently active virtual environment, but leave the
|
||||||
|
# deactivate function in place.
|
||||||
|
deactivate -nondestructive
|
||||||
|
|
||||||
|
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||||
|
# that there is an activated venv.
|
||||||
|
$env:VIRTUAL_ENV = $VenvDir
|
||||||
|
|
||||||
|
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||||
|
|
||||||
|
Write-Verbose "Setting prompt to '$Prompt'"
|
||||||
|
|
||||||
|
# Set the prompt to include the env name
|
||||||
|
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||||
|
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||||
|
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||||
|
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||||
|
|
||||||
|
function global:prompt {
|
||||||
|
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||||
|
_OLD_VIRTUAL_PROMPT
|
||||||
|
}
|
||||||
|
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
||||||
|
}
|
||||||
|
|
||||||
|
# Clear PYTHONHOME
|
||||||
|
if (Test-Path -Path Env:PYTHONHOME) {
|
||||||
|
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||||
|
Remove-Item -Path Env:PYTHONHOME
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add the venv to the PATH
|
||||||
|
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||||
|
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
||||||
|
|
||||||
|
# SIG # Begin signature block
|
||||||
|
# MIIc+QYJKoZIhvcNAQcCoIIc6jCCHOYCAQExDzANBglghkgBZQMEAgEFADB5Bgor
|
||||||
|
# BgEEAYI3AgEEoGswaTA0BgorBgEEAYI3AgEeMCYCAwEAAAQQH8w7YFlLCE63JNLG
|
||||||
|
# KX7zUQIBAAIBAAIBAAIBAAIBADAxMA0GCWCGSAFlAwQCAQUABCB/jbdIOBl7aFn0
|
||||||
|
# IOwX0LZ7IuNFjwXgmb5mWup4AsyxRaCCC38wggUwMIIEGKADAgECAhAECRgbX9W7
|
||||||
|
# ZnVTQ7VvlVAIMA0GCSqGSIb3DQEBCwUAMGUxCzAJBgNVBAYTAlVTMRUwEwYDVQQK
|
||||||
|
# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xJDAiBgNV
|
||||||
|
# BAMTG0RpZ2lDZXJ0IEFzc3VyZWQgSUQgUm9vdCBDQTAeFw0xMzEwMjIxMjAwMDBa
|
||||||
|
# Fw0yODEwMjIxMjAwMDBaMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2Vy
|
||||||
|
# dCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lD
|
||||||
|
# ZXJ0IFNIQTIgQXNzdXJlZCBJRCBDb2RlIFNpZ25pbmcgQ0EwggEiMA0GCSqGSIb3
|
||||||
|
# DQEBAQUAA4IBDwAwggEKAoIBAQD407Mcfw4Rr2d3B9MLMUkZz9D7RZmxOttE9X/l
|
||||||
|
# qJ3bMtdx6nadBS63j/qSQ8Cl+YnUNxnXtqrwnIal2CWsDnkoOn7p0WfTxvspJ8fT
|
||||||
|
# eyOU5JEjlpB3gvmhhCNmElQzUHSxKCa7JGnCwlLyFGeKiUXULaGj6YgsIJWuHEqH
|
||||||
|
# CN8M9eJNYBi+qsSyrnAxZjNxPqxwoqvOf+l8y5Kh5TsxHM/q8grkV7tKtel05iv+
|
||||||
|
# bMt+dDk2DZDv5LVOpKnqagqrhPOsZ061xPeM0SAlI+sIZD5SlsHyDxL0xY4PwaLo
|
||||||
|
# LFH3c7y9hbFig3NBggfkOItqcyDQD2RzPJ6fpjOp/RnfJZPRAgMBAAGjggHNMIIB
|
||||||
|
# yTASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjATBgNVHSUEDDAK
|
||||||
|
# BggrBgEFBQcDAzB5BggrBgEFBQcBAQRtMGswJAYIKwYBBQUHMAGGGGh0dHA6Ly9v
|
||||||
|
# Y3NwLmRpZ2ljZXJ0LmNvbTBDBggrBgEFBQcwAoY3aHR0cDovL2NhY2VydHMuZGln
|
||||||
|
# aWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNydDCBgQYDVR0fBHow
|
||||||
|
# eDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJl
|
||||||
|
# ZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0Rp
|
||||||
|
# Z2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDBPBgNVHSAESDBGMDgGCmCGSAGG/WwA
|
||||||
|
# AgQwKjAoBggrBgEFBQcCARYcaHR0cHM6Ly93d3cuZGlnaWNlcnQuY29tL0NQUzAK
|
||||||
|
# BghghkgBhv1sAzAdBgNVHQ4EFgQUWsS5eyoKo6XqcQPAYPkt9mV1DlgwHwYDVR0j
|
||||||
|
# BBgwFoAUReuir/SSy4IxLVGLp6chnfNtyA8wDQYJKoZIhvcNAQELBQADggEBAD7s
|
||||||
|
# DVoks/Mi0RXILHwlKXaoHV0cLToaxO8wYdd+C2D9wz0PxK+L/e8q3yBVN7Dh9tGS
|
||||||
|
# dQ9RtG6ljlriXiSBThCk7j9xjmMOE0ut119EefM2FAaK95xGTlz/kLEbBw6RFfu6
|
||||||
|
# r7VRwo0kriTGxycqoSkoGjpxKAI8LpGjwCUR4pwUR6F6aGivm6dcIFzZcbEMj7uo
|
||||||
|
# +MUSaJ/PQMtARKUT8OZkDCUIQjKyNookAv4vcn4c10lFluhZHen6dGRrsutmQ9qz
|
||||||
|
# sIzV6Q3d9gEgzpkxYz0IGhizgZtPxpMQBvwHgfqL2vmCSfdibqFT+hKUGIUukpHq
|
||||||
|
# aGxEMrJmoecYpJpkUe8wggZHMIIFL6ADAgECAhADPtXtoGXRuMkd/PkqbJvYMA0G
|
||||||
|
# CSqGSIb3DQEBCwUAMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
|
||||||
|
# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lDZXJ0
|
||||||
|
# IFNIQTIgQXNzdXJlZCBJRCBDb2RlIFNpZ25pbmcgQ0EwHhcNMTgxMjE4MDAwMDAw
|
||||||
|
# WhcNMjExMjIyMTIwMDAwWjCBgzELMAkGA1UEBhMCVVMxFjAUBgNVBAgTDU5ldyBI
|
||||||
|
# YW1wc2hpcmUxEjAQBgNVBAcTCVdvbGZlYm9ybzEjMCEGA1UEChMaUHl0aG9uIFNv
|
||||||
|
# ZnR3YXJlIEZvdW5kYXRpb24xIzAhBgNVBAMTGlB5dGhvbiBTb2Z0d2FyZSBGb3Vu
|
||||||
|
# ZGF0aW9uMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAqr2kS7J1uW7o
|
||||||
|
# JRxlsdrETAjKarfoH5TI8PWST6Yb2xPooP7vHT4iaVXyL5Lze1f53Jw67Sp+u524
|
||||||
|
# fJXf30qHViEWxumy2RWG0nciU2d+mMqzjlaAWSZNF0u4RcvyDJokEV0RUOqI5CG5
|
||||||
|
# zPI3W9uQ6LiUk3HCYW6kpH177A5T3pw/Po8O8KErJGn1anaqtIICq99ySxrMad/2
|
||||||
|
# hPMBRf6Ndah7f7HPn1gkSSTAoejyuqF5h+B0qI4+JK5+VLvz659VTbAWJsYakkxZ
|
||||||
|
# xVWYpFv4KeQSSwoo0DzMvmERsTzNvVBMWhu9OriJNg+QfFmf96zVTu93cZ+r7xMp
|
||||||
|
# bXyfIOGKhHMaRuZ8ihuWIx3gI9WHDFX6fBKR8+HlhdkaiBEWIsXRoy+EQUyK7zUs
|
||||||
|
# +FqOo2sRYttbs8MTF9YDKFZwyPjn9Wn+gLGd5NUEVyNvD9QVGBEtN7vx87bduJUB
|
||||||
|
# 8F4DylEsMtZTfjw/au6AmOnmneK5UcqSJuwRyZaGNk7y3qj06utx+HTTqHgi975U
|
||||||
|
# pxfyrwAqkovoZEWBVSpvku8PVhkBXcLmNe6MEHlFiaMoiADAeKmX5RFRkN+VrmYG
|
||||||
|
# Tg4zajxfdHeIY8TvLf48tTfmnQJd98geJQv/01NUy/FxuwqAuTkaez5Nl1LxP0Cp
|
||||||
|
# THhghzO4FRD4itT2wqTh4jpojw9QZnsCAwEAAaOCAcUwggHBMB8GA1UdIwQYMBaA
|
||||||
|
# FFrEuXsqCqOl6nEDwGD5LfZldQ5YMB0GA1UdDgQWBBT8Kr9+1L6s84KcpM97IgE7
|
||||||
|
# uI8H8jAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwMwdwYDVR0f
|
||||||
|
# BHAwbjA1oDOgMYYvaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL3NoYTItYXNzdXJl
|
||||||
|
# ZC1jcy1nMS5jcmwwNaAzoDGGL2h0dHA6Ly9jcmw0LmRpZ2ljZXJ0LmNvbS9zaGEy
|
||||||
|
# LWFzc3VyZWQtY3MtZzEuY3JsMEwGA1UdIARFMEMwNwYJYIZIAYb9bAMBMCowKAYI
|
||||||
|
# KwYBBQUHAgEWHGh0dHBzOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwCAYGZ4EMAQQB
|
||||||
|
# MIGEBggrBgEFBQcBAQR4MHYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2lj
|
||||||
|
# ZXJ0LmNvbTBOBggrBgEFBQcwAoZCaHR0cDovL2NhY2VydHMuZGlnaWNlcnQuY29t
|
||||||
|
# L0RpZ2lDZXJ0U0hBMkFzc3VyZWRJRENvZGVTaWduaW5nQ0EuY3J0MAwGA1UdEwEB
|
||||||
|
# /wQCMAAwDQYJKoZIhvcNAQELBQADggEBAEt1oS21X0axiafPjyY+vlYqjWKuUu/Y
|
||||||
|
# FuYWIEq6iRRaFabNDhj9RBFQF/aJiE5msrQEOfAD6/6gVSH91lZWBqg6NEeG9T9S
|
||||||
|
# XbiAPvJ9CEWFsdkXUrjbWhvCnuZ7kqUuU5BAumI1QRbpYgZL3UA+iZXkmjbGh1ln
|
||||||
|
# 8rUhWIxbBYL4Sg2nqpB44p7CUFYkPj/MbwU2gvBV2pXjj5WaskoZtsACMv5g42BN
|
||||||
|
# oVLoRAi+ev6s07POt+JtHRIm87lTyuc8wh0swTPUwksKbLU1Zdj9CpqtzXnuVE0w
|
||||||
|
# 50exJvRSK3Vt4g+0vigpI3qPmDdpkf9+4Mvy0XMNcqrthw20R+PkIlMxghDQMIIQ
|
||||||
|
# zAIBATCBhjByMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkw
|
||||||
|
# FwYDVQQLExB3d3cuZGlnaWNlcnQuY29tMTEwLwYDVQQDEyhEaWdpQ2VydCBTSEEy
|
||||||
|
# IEFzc3VyZWQgSUQgQ29kZSBTaWduaW5nIENBAhADPtXtoGXRuMkd/PkqbJvYMA0G
|
||||||
|
# CWCGSAFlAwQCAQUAoIGaMBkGCSqGSIb3DQEJAzEMBgorBgEEAYI3AgEEMBwGCisG
|
||||||
|
# AQQBgjcCAQsxDjAMBgorBgEEAYI3AgEVMC4GCisGAQQBgjcCAQwxIDAeoByAGgBQ
|
||||||
|
# AHkAdABoAG8AbgAgADMALgAxADAALgAwMC8GCSqGSIb3DQEJBDEiBCBSbvNxuLmL
|
||||||
|
# Fyf70+vzWnE86JSS2JLTJbh9WhRqgW9MeDANBgkqhkiG9w0BAQEFAASCAgCAiz/E
|
||||||
|
# icJSsvY4x2vZnY3UjThg0S9qs+r104rhPxj39k4Qw7szI4yjQQbb2bvnoJA3LoFo
|
||||||
|
# RROlFg9dXJ8YWRZRNkla+zHX7bIsWO2aIFCnOCOUFH19ttlrTvnal6uLp7P9+wQs
|
||||||
|
# rller1aRyNIM47dYn+CGxD4NEJ/NIqhCjuRKl0v1Dkps/2md0YkoEcnRXNb3vq6x
|
||||||
|
# +2L9x3zAcBmVwVM4SFFZ2ZZQG/aHgihbVoNxxTXhYDcwaL5uRrNelz9KXDn+GYpk
|
||||||
|
# K7ZUFbmNfgnhieLHqF0hk7wLZtHI1BSmsOAFrkApcuIVLzD8aSgDbAMVZEv3GkTx
|
||||||
|
# Img7jKusLIITGuUWT8wO1LDUXT54qBkQhue6kJ3rqSa2agtg/OWxtQ9JwGSOiRaW
|
||||||
|
# wlRJjsihrw8Nx1Kcr5EwruBBLFiF+mv/C5ikLvwES1ZKoLccqCftuEptcbmsyEZS
|
||||||
|
# ov39SslaIWvqfy7rfz+KFfP9WHJxobV6DY4essDCMNcoYXkRwhbT+rr0ydDH23DS
|
||||||
|
# 3hbXpCuKsy5IAMB7Xk8/uuXV2The/qKmkkmu0KuFOu2/3oqVOC4a27IjkvBCSRhp
|
||||||
|
# /yWQSM/JQk+KwQ31XCVHeGWf7kqMgCXwkZfkw/lvusXzMuWZqT6bfZ0eGjqX/6jC
|
||||||
|
# kNwr4fCZtxx0cFLzmCr6/yClCYoDCfGoc1I+D6GCDX0wgg15BgorBgEEAYI3AwMB
|
||||||
|
# MYINaTCCDWUGCSqGSIb3DQEHAqCCDVYwgg1SAgEDMQ8wDQYJYIZIAWUDBAIBBQAw
|
||||||
|
# dwYLKoZIhvcNAQkQAQSgaARmMGQCAQEGCWCGSAGG/WwHATAxMA0GCWCGSAFlAwQC
|
||||||
|
# AQUABCBEd30afcCyVMH4hw1ZZPb4JotijhQZtXQ42klvgjTVGwIQDDJTIO6lXwNY
|
||||||
|
# 7qTonYA8LxgPMjAyMTEwMDQxOTExMzFaoIIKNzCCBP4wggPmoAMCAQICEA1CSuC+
|
||||||
|
# Ooj/YEAhzhQA8N0wDQYJKoZIhvcNAQELBQAwcjELMAkGA1UEBhMCVVMxFTATBgNV
|
||||||
|
# BAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3LmRpZ2ljZXJ0LmNvbTExMC8G
|
||||||
|
# A1UEAxMoRGlnaUNlcnQgU0hBMiBBc3N1cmVkIElEIFRpbWVzdGFtcGluZyBDQTAe
|
||||||
|
# Fw0yMTAxMDEwMDAwMDBaFw0zMTAxMDYwMDAwMDBaMEgxCzAJBgNVBAYTAlVTMRcw
|
||||||
|
# FQYDVQQKEw5EaWdpQ2VydCwgSW5jLjEgMB4GA1UEAxMXRGlnaUNlcnQgVGltZXN0
|
||||||
|
# YW1wIDIwMjEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDC5mGEZ8WK
|
||||||
|
# 9Q0IpEXKY2tR1zoRQr0KdXVNlLQMULUmEP4dyG+RawyW5xpcSO9E5b+bYc0VkWJa
|
||||||
|
# uP9nC5xj/TZqgfop+N0rcIXeAhjzeG28ffnHbQk9vmp2h+mKvfiEXR52yeTGdnY6
|
||||||
|
# U9HR01o2j8aj4S8bOrdh1nPsTm0zinxdRS1LsVDmQTo3VobckyON91Al6GTm3dOP
|
||||||
|
# L1e1hyDrDo4s1SPa9E14RuMDgzEpSlwMMYpKjIjF9zBa+RSvFV9sQ0kJ/SYjU/aN
|
||||||
|
# Y+gaq1uxHTDCm2mCtNv8VlS8H6GHq756WwogL0sJyZWnjbL61mOLTqVyHO6fegFz
|
||||||
|
# +BnW/g1JhL0BAgMBAAGjggG4MIIBtDAOBgNVHQ8BAf8EBAMCB4AwDAYDVR0TAQH/
|
||||||
|
# BAIwADAWBgNVHSUBAf8EDDAKBggrBgEFBQcDCDBBBgNVHSAEOjA4MDYGCWCGSAGG
|
||||||
|
# /WwHATApMCcGCCsGAQUFBwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMw
|
||||||
|
# HwYDVR0jBBgwFoAU9LbhIB3+Ka7S5GGlsqIlssgXNW4wHQYDVR0OBBYEFDZEho6k
|
||||||
|
# urBmvrwoLR1ENt3janq8MHEGA1UdHwRqMGgwMqAwoC6GLGh0dHA6Ly9jcmwzLmRp
|
||||||
|
# Z2ljZXJ0LmNvbS9zaGEyLWFzc3VyZWQtdHMuY3JsMDKgMKAuhixodHRwOi8vY3Js
|
||||||
|
# NC5kaWdpY2VydC5jb20vc2hhMi1hc3N1cmVkLXRzLmNybDCBhQYIKwYBBQUHAQEE
|
||||||
|
# eTB3MCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wTwYIKwYB
|
||||||
|
# BQUHMAKGQ2h0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFNIQTJB
|
||||||
|
# c3N1cmVkSURUaW1lc3RhbXBpbmdDQS5jcnQwDQYJKoZIhvcNAQELBQADggEBAEgc
|
||||||
|
# 3LXpmiO85xrnIA6OZ0b9QnJRdAojR6OrktIlxHBZvhSg5SeBpU0UFRkHefDRBMOG
|
||||||
|
# 2Tu9/kQCZk3taaQP9rhwz2Lo9VFKeHk2eie38+dSn5On7UOee+e03UEiifuHokYD
|
||||||
|
# Tvz0/rdkd2NfI1Jpg4L6GlPtkMyNoRdzDfTzZTlwS/Oc1np72gy8PTLQG8v1Yfx1
|
||||||
|
# CAB2vIEO+MDhXM/EEXLnG2RJ2CKadRVC9S0yOIHa9GCiurRS+1zgYSQlT7LfySmo
|
||||||
|
# c0NR2r1j1h9bm/cuG08THfdKDXF+l7f0P4TrweOjSaH6zqe/Vs+6WXZhiV9+p7SO
|
||||||
|
# Z3j5NpjhyyjaW4emii8wggUxMIIEGaADAgECAhAKoSXW1jIbfkHkBdo2l8IVMA0G
|
||||||
|
# CSqGSIb3DQEBCwUAMGUxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
|
||||||
|
# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xJDAiBgNVBAMTG0RpZ2lDZXJ0
|
||||||
|
# IEFzc3VyZWQgSUQgUm9vdCBDQTAeFw0xNjAxMDcxMjAwMDBaFw0zMTAxMDcxMjAw
|
||||||
|
# MDBaMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNV
|
||||||
|
# BAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lDZXJ0IFNIQTIgQXNz
|
||||||
|
# dXJlZCBJRCBUaW1lc3RhbXBpbmcgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw
|
||||||
|
# ggEKAoIBAQC90DLuS82Pf92puoKZxTlUKFe2I0rEDgdFM1EQfdD5fU1ofue2oPSN
|
||||||
|
# s4jkl79jIZCYvxO8V9PD4X4I1moUADj3Lh477sym9jJZ/l9lP+Cb6+NGRwYaVX4L
|
||||||
|
# J37AovWg4N4iPw7/fpX786O6Ij4YrBHk8JkDbTuFfAnT7l3ImgtU46gJcWvgzyIQ
|
||||||
|
# D3XPcXJOCq3fQDpct1HhoXkUxk0kIzBdvOw8YGqsLwfM/fDqR9mIUF79Zm5WYScp
|
||||||
|
# iYRR5oLnRlD9lCosp+R1PrqYD4R/nzEU1q3V8mTLex4F0IQZchfxFwbvPc3WTe8G
|
||||||
|
# Qv2iUypPhR3EHTyvz9qsEPXdrKzpVv+TAgMBAAGjggHOMIIByjAdBgNVHQ4EFgQU
|
||||||
|
# 9LbhIB3+Ka7S5GGlsqIlssgXNW4wHwYDVR0jBBgwFoAUReuir/SSy4IxLVGLp6ch
|
||||||
|
# nfNtyA8wEgYDVR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8EBAMCAYYwEwYDVR0l
|
||||||
|
# BAwwCgYIKwYBBQUHAwgweQYIKwYBBQUHAQEEbTBrMCQGCCsGAQUFBzABhhhodHRw
|
||||||
|
# Oi8vb2NzcC5kaWdpY2VydC5jb20wQwYIKwYBBQUHMAKGN2h0dHA6Ly9jYWNlcnRz
|
||||||
|
# LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEFzc3VyZWRJRFJvb3RDQS5jcnQwgYEGA1Ud
|
||||||
|
# HwR6MHgwOqA4oDaGNGh0dHA6Ly9jcmw0LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEFz
|
||||||
|
# c3VyZWRJRFJvb3RDQS5jcmwwOqA4oDaGNGh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNv
|
||||||
|
# bS9EaWdpQ2VydEFzc3VyZWRJRFJvb3RDQS5jcmwwUAYDVR0gBEkwRzA4BgpghkgB
|
||||||
|
# hv1sAAIEMCowKAYIKwYBBQUHAgEWHGh0dHBzOi8vd3d3LmRpZ2ljZXJ0LmNvbS9D
|
||||||
|
# UFMwCwYJYIZIAYb9bAcBMA0GCSqGSIb3DQEBCwUAA4IBAQBxlRLpUYdWac3v3dp8
|
||||||
|
# qmN6s3jPBjdAhO9LhL/KzwMC/cWnww4gQiyvd/MrHwwhWiq3BTQdaq6Z+CeiZr8J
|
||||||
|
# qmDfdqQ6kw/4stHYfBli6F6CJR7Euhx7LCHi1lssFDVDBGiy23UC4HLHmNY8ZOUf
|
||||||
|
# SBAYX4k4YU1iRiSHY4yRUiyvKYnleB/WCxSlgNcSR3CzddWThZN+tpJn+1Nhiaj1
|
||||||
|
# a5bA9FhpDXzIAbG5KHW3mWOFIoxhynmUfln8jA/jb7UBJrZspe6HUSHkWGCbugwt
|
||||||
|
# K22ixH67xCUrRwIIfEmuE7bhfEJCKMYYVs9BNLZmXbZ0e/VWMyIvIjayS6JKldj1
|
||||||
|
# po5SMYIChjCCAoICAQEwgYYwcjELMAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lD
|
||||||
|
# ZXJ0IEluYzEZMBcGA1UECxMQd3d3LmRpZ2ljZXJ0LmNvbTExMC8GA1UEAxMoRGln
|
||||||
|
# aUNlcnQgU0hBMiBBc3N1cmVkIElEIFRpbWVzdGFtcGluZyBDQQIQDUJK4L46iP9g
|
||||||
|
# QCHOFADw3TANBglghkgBZQMEAgEFAKCB0TAaBgkqhkiG9w0BCQMxDQYLKoZIhvcN
|
||||||
|
# AQkQAQQwHAYJKoZIhvcNAQkFMQ8XDTIxMTAwNDE5MTEzMVowKwYLKoZIhvcNAQkQ
|
||||||
|
# AgwxHDAaMBgwFgQU4deCqOGRvu9ryhaRtaq0lKYkm/MwLwYJKoZIhvcNAQkEMSIE
|
||||||
|
# ILvICiD0Bu7OdD0pC2wAqLO9UGMzUOfGejuSENvWkuXdMDcGCyqGSIb3DQEJEAIv
|
||||||
|
# MSgwJjAkMCIEILMQkAa8CtmDB5FXKeBEA0Fcg+MpK2FPJpZMjTVx7PWpMA0GCSqG
|
||||||
|
# SIb3DQEBAQUABIIBAIaCoJWKRd6cHB/KrrlVmBY469068xG7ok+T18bfcLmNrvPF
|
||||||
|
# 7PGY5a4qcMZj+rBevyfWTrOreNAWyNhnxIT0qYneSTJOMytTPYnJI+GhvGwQjDhC
|
||||||
|
# Eg/JeLOe9guMq7P/ZNvFur+VoCz6sgR/Q+9IGUhJ/7liABdMwNLK38r5VEaSAnSW
|
||||||
|
# RetjuSqtMoZc2KtjL/MUY26sUwjsMD0tgt0EOF4nrcv3rWl++TsJUEqYr+aFpNu4
|
||||||
|
# eVaTNeS0V7sRGQbWAQohkES879Lpqv7KaEW+h426+cc5el260gynz7vTzUuaamvW
|
||||||
|
# Nfbvu83P5Tk1nRA1Ds2aSqn/RMu6cNNjD8ntV5o=
|
||||||
|
# SIG # End signature block
|
@ -0,0 +1,69 @@
|
|||||||
|
# This file must be used with "source bin/activate" *from bash*
|
||||||
|
# you cannot run it directly
|
||||||
|
|
||||||
|
deactivate () {
|
||||||
|
# reset old environment variables
|
||||||
|
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||||
|
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||||
|
export PATH
|
||||||
|
unset _OLD_VIRTUAL_PATH
|
||||||
|
fi
|
||||||
|
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||||
|
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||||
|
export PYTHONHOME
|
||||||
|
unset _OLD_VIRTUAL_PYTHONHOME
|
||||||
|
fi
|
||||||
|
|
||||||
|
# This should detect bash and zsh, which have a hash command that must
|
||||||
|
# be called to get it to forget past commands. Without forgetting
|
||||||
|
# past commands the $PATH changes we made may not be respected
|
||||||
|
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||||
|
hash -r 2> /dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||||
|
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||||
|
export PS1
|
||||||
|
unset _OLD_VIRTUAL_PS1
|
||||||
|
fi
|
||||||
|
|
||||||
|
unset VIRTUAL_ENV
|
||||||
|
unset VIRTUAL_ENV_PROMPT
|
||||||
|
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||||
|
# Self destruct!
|
||||||
|
unset -f deactivate
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# unset irrelevant variables
|
||||||
|
deactivate nondestructive
|
||||||
|
|
||||||
|
VIRTUAL_ENV="C:\Users\erica\soupboat\library-db\.library"
|
||||||
|
export VIRTUAL_ENV
|
||||||
|
|
||||||
|
_OLD_VIRTUAL_PATH="$PATH"
|
||||||
|
PATH="$VIRTUAL_ENV/Scripts:$PATH"
|
||||||
|
export PATH
|
||||||
|
|
||||||
|
# unset PYTHONHOME if set
|
||||||
|
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||||
|
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||||
|
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||||
|
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||||
|
unset PYTHONHOME
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||||
|
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||||
|
PS1="(.library) ${PS1:-}"
|
||||||
|
export PS1
|
||||||
|
VIRTUAL_ENV_PROMPT="(.library) "
|
||||||
|
export VIRTUAL_ENV_PROMPT
|
||||||
|
fi
|
||||||
|
|
||||||
|
# This should detect bash and zsh, which have a hash command that must
|
||||||
|
# be called to get it to forget past commands. Without forgetting
|
||||||
|
# past commands the $PATH changes we made may not be respected
|
||||||
|
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||||
|
hash -r 2> /dev/null
|
||||||
|
fi
|
@ -0,0 +1,34 @@
|
|||||||
|
@echo off
|
||||||
|
|
||||||
|
rem This file is UTF-8 encoded, so we need to update the current code page while executing it
|
||||||
|
for /f "tokens=2 delims=:." %%a in ('"%SystemRoot%\System32\chcp.com"') do (
|
||||||
|
set _OLD_CODEPAGE=%%a
|
||||||
|
)
|
||||||
|
if defined _OLD_CODEPAGE (
|
||||||
|
"%SystemRoot%\System32\chcp.com" 65001 > nul
|
||||||
|
)
|
||||||
|
|
||||||
|
set VIRTUAL_ENV=C:\Users\erica\soupboat\library-db\.library
|
||||||
|
|
||||||
|
if not defined PROMPT set PROMPT=$P$G
|
||||||
|
|
||||||
|
if defined _OLD_VIRTUAL_PROMPT set PROMPT=%_OLD_VIRTUAL_PROMPT%
|
||||||
|
if defined _OLD_VIRTUAL_PYTHONHOME set PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%
|
||||||
|
|
||||||
|
set _OLD_VIRTUAL_PROMPT=%PROMPT%
|
||||||
|
set PROMPT=(.library) %PROMPT%
|
||||||
|
|
||||||
|
if defined PYTHONHOME set _OLD_VIRTUAL_PYTHONHOME=%PYTHONHOME%
|
||||||
|
set PYTHONHOME=
|
||||||
|
|
||||||
|
if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH%
|
||||||
|
if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH%
|
||||||
|
|
||||||
|
set PATH=%VIRTUAL_ENV%\Scripts;%PATH%
|
||||||
|
set VIRTUAL_ENV_PROMPT=(.library)
|
||||||
|
|
||||||
|
:END
|
||||||
|
if defined _OLD_CODEPAGE (
|
||||||
|
"%SystemRoot%\System32\chcp.com" %_OLD_CODEPAGE% > nul
|
||||||
|
set _OLD_CODEPAGE=
|
||||||
|
)
|
Binary file not shown.
@ -0,0 +1,22 @@
|
|||||||
|
@echo off
|
||||||
|
|
||||||
|
if defined _OLD_VIRTUAL_PROMPT (
|
||||||
|
set "PROMPT=%_OLD_VIRTUAL_PROMPT%"
|
||||||
|
)
|
||||||
|
set _OLD_VIRTUAL_PROMPT=
|
||||||
|
|
||||||
|
if defined _OLD_VIRTUAL_PYTHONHOME (
|
||||||
|
set "PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%"
|
||||||
|
set _OLD_VIRTUAL_PYTHONHOME=
|
||||||
|
)
|
||||||
|
|
||||||
|
if defined _OLD_VIRTUAL_PATH (
|
||||||
|
set "PATH=%_OLD_VIRTUAL_PATH%"
|
||||||
|
)
|
||||||
|
|
||||||
|
set _OLD_VIRTUAL_PATH=
|
||||||
|
|
||||||
|
set VIRTUAL_ENV=
|
||||||
|
set VIRTUAL_ENV_PROMPT=
|
||||||
|
|
||||||
|
:END
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
pip
|
@ -0,0 +1,28 @@
|
|||||||
|
Copyright 2010 Pallets
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,123 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: Flask
|
||||||
|
Version: 2.2.2
|
||||||
|
Summary: A simple framework for building complex web applications.
|
||||||
|
Home-page: https://palletsprojects.com/p/flask
|
||||||
|
Author: Armin Ronacher
|
||||||
|
Author-email: armin.ronacher@active-4.com
|
||||||
|
Maintainer: Pallets
|
||||||
|
Maintainer-email: contact@palletsprojects.com
|
||||||
|
License: BSD-3-Clause
|
||||||
|
Project-URL: Donate, https://palletsprojects.com/donate
|
||||||
|
Project-URL: Documentation, https://flask.palletsprojects.com/
|
||||||
|
Project-URL: Changes, https://flask.palletsprojects.com/changes/
|
||||||
|
Project-URL: Source Code, https://github.com/pallets/flask/
|
||||||
|
Project-URL: Issue Tracker, https://github.com/pallets/flask/issues/
|
||||||
|
Project-URL: Twitter, https://twitter.com/PalletsTeam
|
||||||
|
Project-URL: Chat, https://discord.gg/pallets
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Environment :: Web Environment
|
||||||
|
Classifier: Framework :: Flask
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: BSD License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
||||||
|
Requires-Python: >=3.7
|
||||||
|
Description-Content-Type: text/x-rst
|
||||||
|
License-File: LICENSE.rst
|
||||||
|
Requires-Dist: Werkzeug (>=2.2.2)
|
||||||
|
Requires-Dist: Jinja2 (>=3.0)
|
||||||
|
Requires-Dist: itsdangerous (>=2.0)
|
||||||
|
Requires-Dist: click (>=8.0)
|
||||||
|
Requires-Dist: importlib-metadata (>=3.6.0) ; python_version < "3.10"
|
||||||
|
Provides-Extra: async
|
||||||
|
Requires-Dist: asgiref (>=3.2) ; extra == 'async'
|
||||||
|
Provides-Extra: dotenv
|
||||||
|
Requires-Dist: python-dotenv ; extra == 'dotenv'
|
||||||
|
|
||||||
|
Flask
|
||||||
|
=====
|
||||||
|
|
||||||
|
Flask is a lightweight `WSGI`_ web application framework. It is designed
|
||||||
|
to make getting started quick and easy, with the ability to scale up to
|
||||||
|
complex applications. It began as a simple wrapper around `Werkzeug`_
|
||||||
|
and `Jinja`_ and has become one of the most popular Python web
|
||||||
|
application frameworks.
|
||||||
|
|
||||||
|
Flask offers suggestions, but doesn't enforce any dependencies or
|
||||||
|
project layout. It is up to the developer to choose the tools and
|
||||||
|
libraries they want to use. There are many extensions provided by the
|
||||||
|
community that make adding new functionality easy.
|
||||||
|
|
||||||
|
.. _WSGI: https://wsgi.readthedocs.io/
|
||||||
|
.. _Werkzeug: https://werkzeug.palletsprojects.com/
|
||||||
|
.. _Jinja: https://jinja.palletsprojects.com/
|
||||||
|
|
||||||
|
|
||||||
|
Installing
|
||||||
|
----------
|
||||||
|
|
||||||
|
Install and update using `pip`_:
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
$ pip install -U Flask
|
||||||
|
|
||||||
|
.. _pip: https://pip.pypa.io/en/stable/getting-started/
|
||||||
|
|
||||||
|
|
||||||
|
A Simple Example
|
||||||
|
----------------
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
# save this as app.py
|
||||||
|
from flask import Flask
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def hello():
|
||||||
|
return "Hello, World!"
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
$ flask run
|
||||||
|
* Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
|
||||||
|
|
||||||
|
|
||||||
|
Contributing
|
||||||
|
------------
|
||||||
|
|
||||||
|
For guidance on setting up a development environment and how to make a
|
||||||
|
contribution to Flask, see the `contributing guidelines`_.
|
||||||
|
|
||||||
|
.. _contributing guidelines: https://github.com/pallets/flask/blob/main/CONTRIBUTING.rst
|
||||||
|
|
||||||
|
|
||||||
|
Donate
|
||||||
|
------
|
||||||
|
|
||||||
|
The Pallets organization develops and supports Flask and the libraries
|
||||||
|
it uses. In order to grow the community of contributors and users, and
|
||||||
|
allow the maintainers to devote more time to the projects, `please
|
||||||
|
donate today`_.
|
||||||
|
|
||||||
|
.. _please donate today: https://palletsprojects.com/donate
|
||||||
|
|
||||||
|
|
||||||
|
Links
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Documentation: https://flask.palletsprojects.com/
|
||||||
|
- Changes: https://flask.palletsprojects.com/changes/
|
||||||
|
- PyPI Releases: https://pypi.org/project/Flask/
|
||||||
|
- Source Code: https://github.com/pallets/flask/
|
||||||
|
- Issue Tracker: https://github.com/pallets/flask/issues/
|
||||||
|
- Website: https://palletsprojects.com/p/flask/
|
||||||
|
- Twitter: https://twitter.com/PalletsTeam
|
||||||
|
- Chat: https://discord.gg/pallets
|
@ -0,0 +1,54 @@
|
|||||||
|
../../Scripts/flask.exe,sha256=NQgP9NSC1TefjNGpqFo9v9Q8F8b8I33AJjZwu-6QUyI,107896
|
||||||
|
Flask-2.2.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
Flask-2.2.2.dist-info/LICENSE.rst,sha256=SJqOEQhQntmKN7uYPhHg9-HTHwvY-Zp5yESOf_N9B-o,1475
|
||||||
|
Flask-2.2.2.dist-info/METADATA,sha256=UXiwRLD1johd_tGlYOlOKXkJFIG82ehR3bxqh4XWFwA,3889
|
||||||
|
Flask-2.2.2.dist-info/RECORD,,
|
||||||
|
Flask-2.2.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
Flask-2.2.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
||||||
|
Flask-2.2.2.dist-info/entry_points.txt,sha256=s3MqQpduU25y4dq3ftBYD6bMVdVnbMpZP-sUNw0zw0k,41
|
||||||
|
Flask-2.2.2.dist-info/top_level.txt,sha256=dvi65F6AeGWVU0TBpYiC04yM60-FX1gJFkK31IKQr5c,6
|
||||||
|
flask/__init__.py,sha256=Y4mEWqAMxj_Oxq9eYv3tWyN-0nU9yVKBGK_t6BxqvvM,2890
|
||||||
|
flask/__main__.py,sha256=bYt9eEaoRQWdejEHFD8REx9jxVEdZptECFsV7F49Ink,30
|
||||||
|
flask/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/__main__.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/app.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/blueprints.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/cli.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/config.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/ctx.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/debughelpers.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/globals.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/helpers.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/logging.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/scaffold.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/sessions.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/signals.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/templating.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/testing.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/typing.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/views.cpython-310.pyc,,
|
||||||
|
flask/__pycache__/wrappers.cpython-310.pyc,,
|
||||||
|
flask/app.py,sha256=VfBcGmEVveMcSajkUmDXCEOvAd-2mIBJ355KicvQ4gE,99025
|
||||||
|
flask/blueprints.py,sha256=Jbrt-2jlLiFklC3De9EWBioPtDjHYYbXlTDK9Z7L2nk,26936
|
||||||
|
flask/cli.py,sha256=foLlD8NiIXcxpxMmRQvvlZPbVM8pxOaJG3sa58c9dAA,33486
|
||||||
|
flask/config.py,sha256=IWqHecH4poDxNEUg4U_ZA1CTlL5BKZDX3ofG4UGYyi0,12584
|
||||||
|
flask/ctx.py,sha256=ZOGEWuFjsCIk3vm-C9pLME0e4saeBkeGpr2tTSvemSM,14851
|
||||||
|
flask/debughelpers.py,sha256=_RvAL3TW5lqMJeCVWtTU6rSDJC7jnRaBL6OEkVmooyU,5511
|
||||||
|
flask/globals.py,sha256=1DLZMi8Su-S1gf8zEiR3JPi6VXYIrYqm8C9__Ly66ss,3187
|
||||||
|
flask/helpers.py,sha256=ELq27745jihrdyAP9qY8KENlCVDdnWRWTIn35L9a-UU,25334
|
||||||
|
flask/json/__init__.py,sha256=TOwldHT3_kFaXHlORKi9yCWt7dbPNB0ovdHHQWlSRzY,11175
|
||||||
|
flask/json/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
flask/json/__pycache__/provider.cpython-310.pyc,,
|
||||||
|
flask/json/__pycache__/tag.cpython-310.pyc,,
|
||||||
|
flask/json/provider.py,sha256=jXCNypf11PN4ngQjEt6LnSdCWQ1yHIAkNLHlXQlCB-A,10674
|
||||||
|
flask/json/tag.py,sha256=fys3HBLssWHuMAIJuTcf2K0bCtosePBKXIWASZEEjnU,8857
|
||||||
|
flask/logging.py,sha256=WYng0bLTRS_CJrocGcCLJpibHf1lygHE_pg-KoUIQ4w,2293
|
||||||
|
flask/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
flask/scaffold.py,sha256=tiQRK-vMY5nucoN6pewXF87GaxrltsCGOgTVsT6wm7s,33443
|
||||||
|
flask/sessions.py,sha256=66oGlE-v9iac-eb54tFN3ILAjJ1FeeuHHWw98UVaoxc,15847
|
||||||
|
flask/signals.py,sha256=H7QwDciK-dtBxinjKpexpglP0E6k0MJILiFWTItfmqU,2136
|
||||||
|
flask/templating.py,sha256=1P4OzvSnA2fsJTYgQT3G4owVKsuOz8XddCiR6jMHGJ0,7419
|
||||||
|
flask/testing.py,sha256=p51f9P7jDc_IDSiZug7jypnfVcxsQrMg3B2tnjlpEFw,10596
|
||||||
|
flask/typing.py,sha256=KgxegTF9v9WvuongeF8LooIvpZPauzGrq9ZXf3gBlYc,2969
|
||||||
|
flask/views.py,sha256=bveWilivkPP-4HB9w_fOusBz6sHNIl0QTqKUFMCltzE,6738
|
||||||
|
flask/wrappers.py,sha256=Wa-bhjNdPPveSHS1dpzD_r-ayZxIYFF1DoWncKOafrk,5695
|
@ -0,0 +1,5 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.37.1)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
@ -0,0 +1,2 @@
|
|||||||
|
[console_scripts]
|
||||||
|
flask = flask.cli:main
|
@ -0,0 +1 @@
|
|||||||
|
flask
|
@ -0,0 +1 @@
|
|||||||
|
pip
|
@ -0,0 +1,28 @@
|
|||||||
|
Copyright 2007 Pallets
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,113 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: Jinja2
|
||||||
|
Version: 3.1.2
|
||||||
|
Summary: A very fast and expressive template engine.
|
||||||
|
Home-page: https://palletsprojects.com/p/jinja/
|
||||||
|
Author: Armin Ronacher
|
||||||
|
Author-email: armin.ronacher@active-4.com
|
||||||
|
Maintainer: Pallets
|
||||||
|
Maintainer-email: contact@palletsprojects.com
|
||||||
|
License: BSD-3-Clause
|
||||||
|
Project-URL: Donate, https://palletsprojects.com/donate
|
||||||
|
Project-URL: Documentation, https://jinja.palletsprojects.com/
|
||||||
|
Project-URL: Changes, https://jinja.palletsprojects.com/changes/
|
||||||
|
Project-URL: Source Code, https://github.com/pallets/jinja/
|
||||||
|
Project-URL: Issue Tracker, https://github.com/pallets/jinja/issues/
|
||||||
|
Project-URL: Twitter, https://twitter.com/PalletsTeam
|
||||||
|
Project-URL: Chat, https://discord.gg/pallets
|
||||||
|
Platform: UNKNOWN
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Environment :: Web Environment
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: BSD License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
|
||||||
|
Classifier: Topic :: Text Processing :: Markup :: HTML
|
||||||
|
Requires-Python: >=3.7
|
||||||
|
Description-Content-Type: text/x-rst
|
||||||
|
License-File: LICENSE.rst
|
||||||
|
Requires-Dist: MarkupSafe (>=2.0)
|
||||||
|
Provides-Extra: i18n
|
||||||
|
Requires-Dist: Babel (>=2.7) ; extra == 'i18n'
|
||||||
|
|
||||||
|
Jinja
|
||||||
|
=====
|
||||||
|
|
||||||
|
Jinja is a fast, expressive, extensible templating engine. Special
|
||||||
|
placeholders in the template allow writing code similar to Python
|
||||||
|
syntax. Then the template is passed data to render the final document.
|
||||||
|
|
||||||
|
It includes:
|
||||||
|
|
||||||
|
- Template inheritance and inclusion.
|
||||||
|
- Define and import macros within templates.
|
||||||
|
- HTML templates can use autoescaping to prevent XSS from untrusted
|
||||||
|
user input.
|
||||||
|
- A sandboxed environment can safely render untrusted templates.
|
||||||
|
- AsyncIO support for generating templates and calling async
|
||||||
|
functions.
|
||||||
|
- I18N support with Babel.
|
||||||
|
- Templates are compiled to optimized Python code just-in-time and
|
||||||
|
cached, or can be compiled ahead-of-time.
|
||||||
|
- Exceptions point to the correct line in templates to make debugging
|
||||||
|
easier.
|
||||||
|
- Extensible filters, tests, functions, and even syntax.
|
||||||
|
|
||||||
|
Jinja's philosophy is that while application logic belongs in Python if
|
||||||
|
possible, it shouldn't make the template designer's job difficult by
|
||||||
|
restricting functionality too much.
|
||||||
|
|
||||||
|
|
||||||
|
Installing
|
||||||
|
----------
|
||||||
|
|
||||||
|
Install and update using `pip`_:
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
$ pip install -U Jinja2
|
||||||
|
|
||||||
|
.. _pip: https://pip.pypa.io/en/stable/getting-started/
|
||||||
|
|
||||||
|
|
||||||
|
In A Nutshell
|
||||||
|
-------------
|
||||||
|
|
||||||
|
.. code-block:: jinja
|
||||||
|
|
||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}Members{% endblock %}
|
||||||
|
{% block content %}
|
||||||
|
<ul>
|
||||||
|
{% for user in users %}
|
||||||
|
<li><a href="{{ user.url }}">{{ user.username }}</a></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
|
||||||
|
Donate
|
||||||
|
------
|
||||||
|
|
||||||
|
The Pallets organization develops and supports Jinja and other popular
|
||||||
|
packages. In order to grow the community of contributors and users, and
|
||||||
|
allow the maintainers to devote more time to the projects, `please
|
||||||
|
donate today`_.
|
||||||
|
|
||||||
|
.. _please donate today: https://palletsprojects.com/donate
|
||||||
|
|
||||||
|
|
||||||
|
Links
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Documentation: https://jinja.palletsprojects.com/
|
||||||
|
- Changes: https://jinja.palletsprojects.com/changes/
|
||||||
|
- PyPI Releases: https://pypi.org/project/Jinja2/
|
||||||
|
- Source Code: https://github.com/pallets/jinja/
|
||||||
|
- Issue Tracker: https://github.com/pallets/jinja/issues/
|
||||||
|
- Website: https://palletsprojects.com/p/jinja/
|
||||||
|
- Twitter: https://twitter.com/PalletsTeam
|
||||||
|
- Chat: https://discord.gg/pallets
|
||||||
|
|
||||||
|
|
@ -0,0 +1,58 @@
|
|||||||
|
Jinja2-3.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
Jinja2-3.1.2.dist-info/LICENSE.rst,sha256=O0nc7kEF6ze6wQ-vG-JgQI_oXSUrjp3y4JefweCUQ3s,1475
|
||||||
|
Jinja2-3.1.2.dist-info/METADATA,sha256=PZ6v2SIidMNixR7MRUX9f7ZWsPwtXanknqiZUmRbh4U,3539
|
||||||
|
Jinja2-3.1.2.dist-info/RECORD,,
|
||||||
|
Jinja2-3.1.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
||||||
|
Jinja2-3.1.2.dist-info/entry_points.txt,sha256=zRd62fbqIyfUpsRtU7EVIFyiu1tPwfgO7EvPErnxgTE,59
|
||||||
|
Jinja2-3.1.2.dist-info/top_level.txt,sha256=PkeVWtLb3-CqjWi1fO29OCbj55EhX_chhKrCdrVe_zs,7
|
||||||
|
jinja2/__init__.py,sha256=8vGduD8ytwgD6GDSqpYc2m3aU-T7PKOAddvVXgGr_Fs,1927
|
||||||
|
jinja2/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/_identifier.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/async_utils.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/bccache.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/compiler.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/constants.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/debug.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/defaults.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/environment.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/exceptions.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/ext.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/filters.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/idtracking.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/lexer.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/loaders.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/meta.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/nativetypes.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/nodes.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/optimizer.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/parser.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/runtime.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/sandbox.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/tests.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/utils.cpython-310.pyc,,
|
||||||
|
jinja2/__pycache__/visitor.cpython-310.pyc,,
|
||||||
|
jinja2/_identifier.py,sha256=_zYctNKzRqlk_murTNlzrju1FFJL7Va_Ijqqd7ii2lU,1958
|
||||||
|
jinja2/async_utils.py,sha256=dHlbTeaxFPtAOQEYOGYh_PHcDT0rsDaUJAFDl_0XtTg,2472
|
||||||
|
jinja2/bccache.py,sha256=mhz5xtLxCcHRAa56azOhphIAe19u1we0ojifNMClDio,14061
|
||||||
|
jinja2/compiler.py,sha256=Gs-N8ThJ7OWK4-reKoO8Wh1ZXz95MVphBKNVf75qBr8,72172
|
||||||
|
jinja2/constants.py,sha256=GMoFydBF_kdpaRKPoM5cl5MviquVRLVyZtfp5-16jg0,1433
|
||||||
|
jinja2/debug.py,sha256=iWJ432RadxJNnaMOPrjIDInz50UEgni3_HKuFXi2vuQ,6299
|
||||||
|
jinja2/defaults.py,sha256=boBcSw78h-lp20YbaXSJsqkAI2uN_mD_TtCydpeq5wU,1267
|
||||||
|
jinja2/environment.py,sha256=6uHIcc7ZblqOMdx_uYNKqRnnwAF0_nzbyeMP9FFtuh4,61349
|
||||||
|
jinja2/exceptions.py,sha256=ioHeHrWwCWNaXX1inHmHVblvc4haO7AXsjCp3GfWvx0,5071
|
||||||
|
jinja2/ext.py,sha256=ivr3P7LKbddiXDVez20EflcO3q2aHQwz9P_PgWGHVqE,31502
|
||||||
|
jinja2/filters.py,sha256=9js1V-h2RlyW90IhLiBGLM2U-k6SCy2F4BUUMgB3K9Q,53509
|
||||||
|
jinja2/idtracking.py,sha256=GfNmadir4oDALVxzn3DL9YInhJDr69ebXeA2ygfuCGA,10704
|
||||||
|
jinja2/lexer.py,sha256=DW2nX9zk-6MWp65YR2bqqj0xqCvLtD-u9NWT8AnFRxQ,29726
|
||||||
|
jinja2/loaders.py,sha256=BfptfvTVpClUd-leMkHczdyPNYFzp_n7PKOJ98iyHOg,23207
|
||||||
|
jinja2/meta.py,sha256=GNPEvifmSaU3CMxlbheBOZjeZ277HThOPUTf1RkppKQ,4396
|
||||||
|
jinja2/nativetypes.py,sha256=DXgORDPRmVWgy034H0xL8eF7qYoK3DrMxs-935d0Fzk,4226
|
||||||
|
jinja2/nodes.py,sha256=i34GPRAZexXMT6bwuf5SEyvdmS-bRCy9KMjwN5O6pjk,34550
|
||||||
|
jinja2/optimizer.py,sha256=tHkMwXxfZkbfA1KmLcqmBMSaz7RLIvvItrJcPoXTyD8,1650
|
||||||
|
jinja2/parser.py,sha256=nHd-DFHbiygvfaPtm9rcQXJChZG7DPsWfiEsqfwKerY,39595
|
||||||
|
jinja2/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
jinja2/runtime.py,sha256=5CmD5BjbEJxSiDNTFBeKCaq8qU4aYD2v6q2EluyExms,33476
|
||||||
|
jinja2/sandbox.py,sha256=Y0xZeXQnH6EX5VjaV2YixESxoepnRbW_3UeQosaBU3M,14584
|
||||||
|
jinja2/tests.py,sha256=Am5Z6Lmfr2XaH_npIfJJ8MdXtWsbLjMULZJulTAj30E,5905
|
||||||
|
jinja2/utils.py,sha256=u9jXESxGn8ATZNVolwmkjUVu4SA-tLgV0W7PcSfPfdQ,23965
|
||||||
|
jinja2/visitor.py,sha256=MH14C6yq24G_KVtWzjwaI7Wg14PCJIYlWW1kpkxYak0,3568
|
@ -0,0 +1,5 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.37.1)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
@ -0,0 +1,2 @@
|
|||||||
|
[babel.extractors]
|
||||||
|
jinja2 = jinja2.ext:babel_extract[i18n]
|
@ -0,0 +1 @@
|
|||||||
|
jinja2
|
@ -0,0 +1 @@
|
|||||||
|
pip
|
@ -0,0 +1,28 @@
|
|||||||
|
Copyright 2010 Pallets
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,101 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: MarkupSafe
|
||||||
|
Version: 2.1.1
|
||||||
|
Summary: Safely add untrusted strings to HTML/XML markup.
|
||||||
|
Home-page: https://palletsprojects.com/p/markupsafe/
|
||||||
|
Author: Armin Ronacher
|
||||||
|
Author-email: armin.ronacher@active-4.com
|
||||||
|
Maintainer: Pallets
|
||||||
|
Maintainer-email: contact@palletsprojects.com
|
||||||
|
License: BSD-3-Clause
|
||||||
|
Project-URL: Donate, https://palletsprojects.com/donate
|
||||||
|
Project-URL: Documentation, https://markupsafe.palletsprojects.com/
|
||||||
|
Project-URL: Changes, https://markupsafe.palletsprojects.com/changes/
|
||||||
|
Project-URL: Source Code, https://github.com/pallets/markupsafe/
|
||||||
|
Project-URL: Issue Tracker, https://github.com/pallets/markupsafe/issues/
|
||||||
|
Project-URL: Twitter, https://twitter.com/PalletsTeam
|
||||||
|
Project-URL: Chat, https://discord.gg/pallets
|
||||||
|
Platform: UNKNOWN
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Environment :: Web Environment
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: BSD License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
|
||||||
|
Classifier: Topic :: Text Processing :: Markup :: HTML
|
||||||
|
Requires-Python: >=3.7
|
||||||
|
Description-Content-Type: text/x-rst
|
||||||
|
License-File: LICENSE.rst
|
||||||
|
|
||||||
|
MarkupSafe
|
||||||
|
==========
|
||||||
|
|
||||||
|
MarkupSafe implements a text object that escapes characters so it is
|
||||||
|
safe to use in HTML and XML. Characters that have special meanings are
|
||||||
|
replaced so that they display as the actual characters. This mitigates
|
||||||
|
injection attacks, meaning untrusted user input can safely be displayed
|
||||||
|
on a page.
|
||||||
|
|
||||||
|
|
||||||
|
Installing
|
||||||
|
----------
|
||||||
|
|
||||||
|
Install and update using `pip`_:
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
pip install -U MarkupSafe
|
||||||
|
|
||||||
|
.. _pip: https://pip.pypa.io/en/stable/getting-started/
|
||||||
|
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
>>> from markupsafe import Markup, escape
|
||||||
|
|
||||||
|
>>> # escape replaces special characters and wraps in Markup
|
||||||
|
>>> escape("<script>alert(document.cookie);</script>")
|
||||||
|
Markup('<script>alert(document.cookie);</script>')
|
||||||
|
|
||||||
|
>>> # wrap in Markup to mark text "safe" and prevent escaping
|
||||||
|
>>> Markup("<strong>Hello</strong>")
|
||||||
|
Markup('<strong>hello</strong>')
|
||||||
|
|
||||||
|
>>> escape(Markup("<strong>Hello</strong>"))
|
||||||
|
Markup('<strong>hello</strong>')
|
||||||
|
|
||||||
|
>>> # Markup is a str subclass
|
||||||
|
>>> # methods and operators escape their arguments
|
||||||
|
>>> template = Markup("Hello <em>{name}</em>")
|
||||||
|
>>> template.format(name='"World"')
|
||||||
|
Markup('Hello <em>"World"</em>')
|
||||||
|
|
||||||
|
|
||||||
|
Donate
|
||||||
|
------
|
||||||
|
|
||||||
|
The Pallets organization develops and supports MarkupSafe and other
|
||||||
|
popular packages. In order to grow the community of contributors and
|
||||||
|
users, and allow the maintainers to devote more time to the projects,
|
||||||
|
`please donate today`_.
|
||||||
|
|
||||||
|
.. _please donate today: https://palletsprojects.com/donate
|
||||||
|
|
||||||
|
|
||||||
|
Links
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Documentation: https://markupsafe.palletsprojects.com/
|
||||||
|
- Changes: https://markupsafe.palletsprojects.com/changes/
|
||||||
|
- PyPI Releases: https://pypi.org/project/MarkupSafe/
|
||||||
|
- Source Code: https://github.com/pallets/markupsafe/
|
||||||
|
- Issue Tracker: https://github.com/pallets/markupsafe/issues/
|
||||||
|
- Website: https://palletsprojects.com/p/markupsafe/
|
||||||
|
- Twitter: https://twitter.com/PalletsTeam
|
||||||
|
- Chat: https://discord.gg/pallets
|
||||||
|
|
||||||
|
|
@ -0,0 +1,14 @@
|
|||||||
|
MarkupSafe-2.1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
MarkupSafe-2.1.1.dist-info/LICENSE.rst,sha256=RjHsDbX9kKVH4zaBcmTGeYIUM4FG-KyUtKV_lu6MnsQ,1503
|
||||||
|
MarkupSafe-2.1.1.dist-info/METADATA,sha256=DC93VszmzjLQcrVChRUjtW4XbUwjTdbaplpgdlbFdbs,3242
|
||||||
|
MarkupSafe-2.1.1.dist-info/RECORD,,
|
||||||
|
MarkupSafe-2.1.1.dist-info/WHEEL,sha256=C6CHup2HLC2Rld8AL5u9w89MYULjdaP5k0k7SG83CcI,102
|
||||||
|
MarkupSafe-2.1.1.dist-info/top_level.txt,sha256=qy0Plje5IJuvsCBjejJyhDCjEAdcDLK_2agVcex8Z6U,11
|
||||||
|
markupsafe/__init__.py,sha256=XGdbhy_OLrsQ5ZoFV3V6HQ3GeJ_ojcabRl_8yqehISk,9579
|
||||||
|
markupsafe/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
markupsafe/__pycache__/_native.cpython-310.pyc,,
|
||||||
|
markupsafe/_native.py,sha256=_Q7UsXCOvgdonCgqG3l5asANI6eo50EKnDM-mlwEC5M,1776
|
||||||
|
markupsafe/_speedups.c,sha256=n3jzzaJwXcoN8nTFyA53f3vSqsWK2vujI-v6QYifjhQ,7403
|
||||||
|
markupsafe/_speedups.cp310-win_amd64.pyd,sha256=Mh72D6F52No2JwGW5GRZfeQKwRrES4u8uZFnw_nN4vk,15872
|
||||||
|
markupsafe/_speedups.pyi,sha256=f5QtwIOP0eLrxh2v5p6SmaYmlcHIGIfmz0DovaqL0OU,238
|
||||||
|
markupsafe/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@ -0,0 +1,5 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.37.0)
|
||||||
|
Root-Is-Purelib: false
|
||||||
|
Tag: cp310-cp310-win_amd64
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
markupsafe
|
@ -0,0 +1 @@
|
|||||||
|
pip
|
@ -0,0 +1,28 @@
|
|||||||
|
Copyright 2007 Pallets
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||||
|
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,126 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: Werkzeug
|
||||||
|
Version: 2.2.2
|
||||||
|
Summary: The comprehensive WSGI web application library.
|
||||||
|
Home-page: https://palletsprojects.com/p/werkzeug/
|
||||||
|
Author: Armin Ronacher
|
||||||
|
Author-email: armin.ronacher@active-4.com
|
||||||
|
Maintainer: Pallets
|
||||||
|
Maintainer-email: contact@palletsprojects.com
|
||||||
|
License: BSD-3-Clause
|
||||||
|
Project-URL: Donate, https://palletsprojects.com/donate
|
||||||
|
Project-URL: Documentation, https://werkzeug.palletsprojects.com/
|
||||||
|
Project-URL: Changes, https://werkzeug.palletsprojects.com/changes/
|
||||||
|
Project-URL: Source Code, https://github.com/pallets/werkzeug/
|
||||||
|
Project-URL: Issue Tracker, https://github.com/pallets/werkzeug/issues/
|
||||||
|
Project-URL: Twitter, https://twitter.com/PalletsTeam
|
||||||
|
Project-URL: Chat, https://discord.gg/pallets
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Environment :: Web Environment
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: BSD License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application
|
||||||
|
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
||||||
|
Requires-Python: >=3.7
|
||||||
|
Description-Content-Type: text/x-rst
|
||||||
|
License-File: LICENSE.rst
|
||||||
|
Requires-Dist: MarkupSafe (>=2.1.1)
|
||||||
|
Provides-Extra: watchdog
|
||||||
|
Requires-Dist: watchdog ; extra == 'watchdog'
|
||||||
|
|
||||||
|
Werkzeug
|
||||||
|
========
|
||||||
|
|
||||||
|
*werkzeug* German noun: "tool". Etymology: *werk* ("work"), *zeug* ("stuff")
|
||||||
|
|
||||||
|
Werkzeug is a comprehensive `WSGI`_ web application library. It began as
|
||||||
|
a simple collection of various utilities for WSGI applications and has
|
||||||
|
become one of the most advanced WSGI utility libraries.
|
||||||
|
|
||||||
|
It includes:
|
||||||
|
|
||||||
|
- An interactive debugger that allows inspecting stack traces and
|
||||||
|
source code in the browser with an interactive interpreter for any
|
||||||
|
frame in the stack.
|
||||||
|
- A full-featured request object with objects to interact with
|
||||||
|
headers, query args, form data, files, and cookies.
|
||||||
|
- A response object that can wrap other WSGI applications and handle
|
||||||
|
streaming data.
|
||||||
|
- A routing system for matching URLs to endpoints and generating URLs
|
||||||
|
for endpoints, with an extensible system for capturing variables
|
||||||
|
from URLs.
|
||||||
|
- HTTP utilities to handle entity tags, cache control, dates, user
|
||||||
|
agents, cookies, files, and more.
|
||||||
|
- A threaded WSGI server for use while developing applications
|
||||||
|
locally.
|
||||||
|
- A test client for simulating HTTP requests during testing without
|
||||||
|
requiring running a server.
|
||||||
|
|
||||||
|
Werkzeug doesn't enforce any dependencies. It is up to the developer to
|
||||||
|
choose a template engine, database adapter, and even how to handle
|
||||||
|
requests. It can be used to build all sorts of end user applications
|
||||||
|
such as blogs, wikis, or bulletin boards.
|
||||||
|
|
||||||
|
`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while
|
||||||
|
providing more structure and patterns for defining powerful
|
||||||
|
applications.
|
||||||
|
|
||||||
|
.. _WSGI: https://wsgi.readthedocs.io/en/latest/
|
||||||
|
.. _Flask: https://www.palletsprojects.com/p/flask/
|
||||||
|
|
||||||
|
|
||||||
|
Installing
|
||||||
|
----------
|
||||||
|
|
||||||
|
Install and update using `pip`_:
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
pip install -U Werkzeug
|
||||||
|
|
||||||
|
.. _pip: https://pip.pypa.io/en/stable/getting-started/
|
||||||
|
|
||||||
|
|
||||||
|
A Simple Example
|
||||||
|
----------------
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from werkzeug.wrappers import Request, Response
|
||||||
|
|
||||||
|
@Request.application
|
||||||
|
def application(request):
|
||||||
|
return Response('Hello, World!')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from werkzeug.serving import run_simple
|
||||||
|
run_simple('localhost', 4000, application)
|
||||||
|
|
||||||
|
|
||||||
|
Donate
|
||||||
|
------
|
||||||
|
|
||||||
|
The Pallets organization develops and supports Werkzeug and other
|
||||||
|
popular packages. In order to grow the community of contributors and
|
||||||
|
users, and allow the maintainers to devote more time to the projects,
|
||||||
|
`please donate today`_.
|
||||||
|
|
||||||
|
.. _please donate today: https://palletsprojects.com/donate
|
||||||
|
|
||||||
|
|
||||||
|
Links
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Documentation: https://werkzeug.palletsprojects.com/
|
||||||
|
- Changes: https://werkzeug.palletsprojects.com/changes/
|
||||||
|
- PyPI Releases: https://pypi.org/project/Werkzeug/
|
||||||
|
- Source Code: https://github.com/pallets/werkzeug/
|
||||||
|
- Issue Tracker: https://github.com/pallets/werkzeug/issues/
|
||||||
|
- Website: https://palletsprojects.com/p/werkzeug/
|
||||||
|
- Twitter: https://twitter.com/PalletsTeam
|
||||||
|
- Chat: https://discord.gg/pallets
|
@ -0,0 +1,98 @@
|
|||||||
|
Werkzeug-2.2.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
Werkzeug-2.2.2.dist-info/LICENSE.rst,sha256=O0nc7kEF6ze6wQ-vG-JgQI_oXSUrjp3y4JefweCUQ3s,1475
|
||||||
|
Werkzeug-2.2.2.dist-info/METADATA,sha256=hz42ndovEQQy3rwXKZDwR7LA4UNthKegxf_7xIQrjsM,4416
|
||||||
|
Werkzeug-2.2.2.dist-info/RECORD,,
|
||||||
|
Werkzeug-2.2.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
||||||
|
Werkzeug-2.2.2.dist-info/top_level.txt,sha256=QRyj2VjwJoQkrwjwFIOlB8Xg3r9un0NtqVHQF-15xaw,9
|
||||||
|
werkzeug/__init__.py,sha256=UP218Ddd2NYm1dUhTlhvGRQytzAx1Ms1A716UKiPOYk,188
|
||||||
|
werkzeug/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/_internal.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/_reloader.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/datastructures.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/exceptions.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/formparser.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/http.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/local.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/security.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/serving.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/test.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/testapp.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/urls.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/user_agent.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/utils.cpython-310.pyc,,
|
||||||
|
werkzeug/__pycache__/wsgi.cpython-310.pyc,,
|
||||||
|
werkzeug/_internal.py,sha256=g8PHJz2z39I3x0vwTvTKbXIg0eUQqGF9UtUzDMWT0Qw,16222
|
||||||
|
werkzeug/_reloader.py,sha256=lYStlIDduTxBOB8BSozy_44HQ7YT5fup-x3uuac1-2o,14331
|
||||||
|
werkzeug/datastructures.py,sha256=T1SRE_KRuNz9Q7P-Ck4PyKPyil1NOx9zDuNMLgrN1Z0,97083
|
||||||
|
werkzeug/datastructures.pyi,sha256=HRzDLc7A6qnwluhNqn6AT76CsLZIkAbVVqxn0AbfV-s,34506
|
||||||
|
werkzeug/debug/__init__.py,sha256=Gpq6OpS6mHwHk0mJkHc2fWvvjo6ccJVS9QJwJgoeb9I,18893
|
||||||
|
werkzeug/debug/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
werkzeug/debug/__pycache__/console.cpython-310.pyc,,
|
||||||
|
werkzeug/debug/__pycache__/repr.cpython-310.pyc,,
|
||||||
|
werkzeug/debug/__pycache__/tbtools.cpython-310.pyc,,
|
||||||
|
werkzeug/debug/console.py,sha256=dechqiCtHfs0AQZWZofUC1S97tCuvwDgT0gdha5KwWM,6208
|
||||||
|
werkzeug/debug/repr.py,sha256=FFczy4yhVfEQjW99HuZtUce-ebtJWMjp9GnfasXa0KA,9488
|
||||||
|
werkzeug/debug/shared/ICON_LICENSE.md,sha256=DhA6Y1gUl5Jwfg0NFN9Rj4VWITt8tUx0IvdGf0ux9-s,222
|
||||||
|
werkzeug/debug/shared/console.png,sha256=bxax6RXXlvOij_KeqvSNX0ojJf83YbnZ7my-3Gx9w2A,507
|
||||||
|
werkzeug/debug/shared/debugger.js,sha256=tg42SZs1SVmYWZ-_Fj5ELK5-FLHnGNQrei0K2By8Bw8,10521
|
||||||
|
werkzeug/debug/shared/less.png,sha256=-4-kNRaXJSONVLahrQKUxMwXGm9R4OnZ9SxDGpHlIR4,191
|
||||||
|
werkzeug/debug/shared/more.png,sha256=GngN7CioHQoV58rH6ojnkYi8c_qED2Aka5FO5UXrReY,200
|
||||||
|
werkzeug/debug/shared/style.css,sha256=-xSxzUEZGw_IqlDR5iZxitNl8LQUjBM-_Y4UAvXVH8g,6078
|
||||||
|
werkzeug/debug/tbtools.py,sha256=Fsmlk6Ao3CcXm9iX7i_8MhCp2SQZ8uHm8Cf5wacnlW4,13293
|
||||||
|
werkzeug/exceptions.py,sha256=5MFy6RyaU4nokoYzdDafloY51QUDIGVNKeK_FORUFS0,26543
|
||||||
|
werkzeug/formparser.py,sha256=rLEu_ZwVpvqshZg6E4Qiv36QsmzmCytTijBeGX3dDGk,16056
|
||||||
|
werkzeug/http.py,sha256=i_LrIU9KsOz27zfkwKIK6eifFuFMKgSuW15k57HbMiE,42162
|
||||||
|
werkzeug/local.py,sha256=1IRMV9MFrauLaZeliF0Md1n7ZOcOKLbS03bnQ8Gz5WY,22326
|
||||||
|
werkzeug/middleware/__init__.py,sha256=qfqgdT5npwG9ses3-FXQJf3aB95JYP1zchetH_T3PUw,500
|
||||||
|
werkzeug/middleware/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
werkzeug/middleware/__pycache__/dispatcher.cpython-310.pyc,,
|
||||||
|
werkzeug/middleware/__pycache__/http_proxy.cpython-310.pyc,,
|
||||||
|
werkzeug/middleware/__pycache__/lint.cpython-310.pyc,,
|
||||||
|
werkzeug/middleware/__pycache__/profiler.cpython-310.pyc,,
|
||||||
|
werkzeug/middleware/__pycache__/proxy_fix.cpython-310.pyc,,
|
||||||
|
werkzeug/middleware/__pycache__/shared_data.cpython-310.pyc,,
|
||||||
|
werkzeug/middleware/dispatcher.py,sha256=Fh_w-KyWnTSYF-Lfv5dimQ7THSS7afPAZMmvc4zF1gg,2580
|
||||||
|
werkzeug/middleware/http_proxy.py,sha256=HE8VyhS7CR-E1O6_9b68huv8FLgGGR1DLYqkS3Xcp3Q,7558
|
||||||
|
werkzeug/middleware/lint.py,sha256=Sr6gV4royDs6ezkqv5trRAyKMDQ60KaEq3-tQ3opUvw,13968
|
||||||
|
werkzeug/middleware/profiler.py,sha256=QkXk7cqnaPnF8wQu-5SyPCIOT3_kdABUBorQOghVNOA,4899
|
||||||
|
werkzeug/middleware/proxy_fix.py,sha256=l7LC_LDu0Yd4SvUxS5SFigAJMzcIOGm6LNKl9IXJBSU,6974
|
||||||
|
werkzeug/middleware/shared_data.py,sha256=fXjrEkuqxUVLG1DLrOdQLc96QQdjftCBZ1oM5oK89h4,9528
|
||||||
|
werkzeug/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
werkzeug/routing/__init__.py,sha256=HpvahY7WwkLdV4Cq3Bsc3GrqNon4u6t8-vhbb9E5o00,4819
|
||||||
|
werkzeug/routing/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
werkzeug/routing/__pycache__/converters.cpython-310.pyc,,
|
||||||
|
werkzeug/routing/__pycache__/exceptions.cpython-310.pyc,,
|
||||||
|
werkzeug/routing/__pycache__/map.cpython-310.pyc,,
|
||||||
|
werkzeug/routing/__pycache__/matcher.cpython-310.pyc,,
|
||||||
|
werkzeug/routing/__pycache__/rules.cpython-310.pyc,,
|
||||||
|
werkzeug/routing/converters.py,sha256=05bkekg64vLC6mqqK4ddBh589WH9yBsjtW8IJhdUBvw,6968
|
||||||
|
werkzeug/routing/exceptions.py,sha256=RklUDL9ajOv2fTcRNj4pb18Bs4Y-GKk4rIeTSfsqkks,4737
|
||||||
|
werkzeug/routing/map.py,sha256=XN4ZjzEF1SfMxtdov89SDE-1_U78KVnnobTfnHzqbmE,36757
|
||||||
|
werkzeug/routing/matcher.py,sha256=U8xZTB3e5f3TgbkxdDyVuyxK4w72l1lo_b3tdG2zNrc,7122
|
||||||
|
werkzeug/routing/rules.py,sha256=v27RaR5H3sIPRdJ_pdEfOBMN6EivFVpmFzJk7aizdyw,31072
|
||||||
|
werkzeug/sansio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
werkzeug/sansio/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
werkzeug/sansio/__pycache__/http.cpython-310.pyc,,
|
||||||
|
werkzeug/sansio/__pycache__/multipart.cpython-310.pyc,,
|
||||||
|
werkzeug/sansio/__pycache__/request.cpython-310.pyc,,
|
||||||
|
werkzeug/sansio/__pycache__/response.cpython-310.pyc,,
|
||||||
|
werkzeug/sansio/__pycache__/utils.cpython-310.pyc,,
|
||||||
|
werkzeug/sansio/http.py,sha256=9eORg44CDxpmV9i_U_pZ_NR8gdc9UXFCdE7EAP1v-c0,5162
|
||||||
|
werkzeug/sansio/multipart.py,sha256=Uyrg2U6s2oft8LXOyuTvFCWTLOEr7INVW8zFTXNwZ7A,9756
|
||||||
|
werkzeug/sansio/request.py,sha256=SiGcx2cz-l81TlCCrKrT2fePqC64hN8fSg5Ig6J6vRs,20175
|
||||||
|
werkzeug/sansio/response.py,sha256=UTl-teQDDjovrZMkjj3ZQsHw-JtiFak5JfKEk1_vBYU,26026
|
||||||
|
werkzeug/sansio/utils.py,sha256=EjbqdHdT-JZWgjUQaaWSgBUIRprXUkrsMQQqJlJHpVU,4847
|
||||||
|
werkzeug/security.py,sha256=vrBofh4WZZoUo1eAdJ6F1DrzVRlYauGS2CUDYpbQKj8,4658
|
||||||
|
werkzeug/serving.py,sha256=18pfjrHw8b5UCgPPo1ZEoxlYZZ5UREl4jQ9f8LGWMAo,38458
|
||||||
|
werkzeug/test.py,sha256=t7T5G-HciIlv1ZXtlydFVpow0VrXnJ_Y3yyEB7T0_Ww,48125
|
||||||
|
werkzeug/testapp.py,sha256=RJhT_2JweNiMKe304N3bF1zaIeMpRx-CIMERdeydfTY,9404
|
||||||
|
werkzeug/urls.py,sha256=Q9Si-eVh7yxk3rwkzrwGRm146FXVXgg9lBP3k0HUfVM,36600
|
||||||
|
werkzeug/user_agent.py,sha256=WclZhpvgLurMF45hsioSbS75H1Zb4iMQGKN3_yZ2oKo,1420
|
||||||
|
werkzeug/utils.py,sha256=OYdB2cZPYYgq3C0EVKMIv05BrYzzr9xdefW0H00_IVo,24936
|
||||||
|
werkzeug/wrappers/__init__.py,sha256=kGyK7rOud3qCxll_jFyW15YarJhj1xtdf3ocx9ZheB8,120
|
||||||
|
werkzeug/wrappers/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
werkzeug/wrappers/__pycache__/request.cpython-310.pyc,,
|
||||||
|
werkzeug/wrappers/__pycache__/response.cpython-310.pyc,,
|
||||||
|
werkzeug/wrappers/request.py,sha256=UQ559KkGS0Po6HTBgvKMlk1_AsNw5zstzm8o_dRrfdQ,23415
|
||||||
|
werkzeug/wrappers/response.py,sha256=c2HUXrrt5Sf8-XEB1fUXxm6jp7Lu80KR0A_tbQFvw1Q,34750
|
||||||
|
werkzeug/wsgi.py,sha256=sgkFCzhl23hlSmbvjxbI-hVEjSlPuEBGTDAHmXFcAts,34732
|
@ -0,0 +1,5 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.37.1)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
werkzeug
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,128 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import importlib
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
|
is_pypy = '__pypy__' in sys.builtin_module_names
|
||||||
|
|
||||||
|
|
||||||
|
warnings.filterwarnings('ignore',
|
||||||
|
r'.+ distutils\b.+ deprecated',
|
||||||
|
DeprecationWarning)
|
||||||
|
|
||||||
|
|
||||||
|
def warn_distutils_present():
|
||||||
|
if 'distutils' not in sys.modules:
|
||||||
|
return
|
||||||
|
if is_pypy and sys.version_info < (3, 7):
|
||||||
|
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
|
||||||
|
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
|
||||||
|
return
|
||||||
|
warnings.warn(
|
||||||
|
"Distutils was imported before Setuptools, but importing Setuptools "
|
||||||
|
"also replaces the `distutils` module in `sys.modules`. This may lead "
|
||||||
|
"to undesirable behaviors or errors. To avoid these issues, avoid "
|
||||||
|
"using distutils directly, ensure that setuptools is installed in the "
|
||||||
|
"traditional way (e.g. not an editable install), and/or make sure "
|
||||||
|
"that setuptools is always imported before distutils.")
|
||||||
|
|
||||||
|
|
||||||
|
def clear_distutils():
|
||||||
|
if 'distutils' not in sys.modules:
|
||||||
|
return
|
||||||
|
warnings.warn("Setuptools is replacing distutils.")
|
||||||
|
mods = [name for name in sys.modules if re.match(r'distutils\b', name)]
|
||||||
|
for name in mods:
|
||||||
|
del sys.modules[name]
|
||||||
|
|
||||||
|
|
||||||
|
def enabled():
|
||||||
|
"""
|
||||||
|
Allow selection of distutils by environment variable.
|
||||||
|
"""
|
||||||
|
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib')
|
||||||
|
return which == 'local'
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_local_distutils():
|
||||||
|
clear_distutils()
|
||||||
|
distutils = importlib.import_module('setuptools._distutils')
|
||||||
|
distutils.__name__ = 'distutils'
|
||||||
|
sys.modules['distutils'] = distutils
|
||||||
|
|
||||||
|
# sanity check that submodules load as expected
|
||||||
|
core = importlib.import_module('distutils.core')
|
||||||
|
assert '_distutils' in core.__file__, core.__file__
|
||||||
|
|
||||||
|
|
||||||
|
def do_override():
|
||||||
|
"""
|
||||||
|
Ensure that the local copy of distutils is preferred over stdlib.
|
||||||
|
|
||||||
|
See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
|
||||||
|
for more motivation.
|
||||||
|
"""
|
||||||
|
if enabled():
|
||||||
|
warn_distutils_present()
|
||||||
|
ensure_local_distutils()
|
||||||
|
|
||||||
|
|
||||||
|
class DistutilsMetaFinder:
|
||||||
|
def find_spec(self, fullname, path, target=None):
|
||||||
|
if path is not None:
|
||||||
|
return
|
||||||
|
|
||||||
|
method_name = 'spec_for_{fullname}'.format(**locals())
|
||||||
|
method = getattr(self, method_name, lambda: None)
|
||||||
|
return method()
|
||||||
|
|
||||||
|
def spec_for_distutils(self):
|
||||||
|
import importlib.abc
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
class DistutilsLoader(importlib.abc.Loader):
|
||||||
|
|
||||||
|
def create_module(self, spec):
|
||||||
|
return importlib.import_module('setuptools._distutils')
|
||||||
|
|
||||||
|
def exec_module(self, module):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return importlib.util.spec_from_loader('distutils', DistutilsLoader())
|
||||||
|
|
||||||
|
def spec_for_pip(self):
|
||||||
|
"""
|
||||||
|
Ensure stdlib distutils when running under pip.
|
||||||
|
See pypa/pip#8761 for rationale.
|
||||||
|
"""
|
||||||
|
if self.pip_imported_during_build():
|
||||||
|
return
|
||||||
|
clear_distutils()
|
||||||
|
self.spec_for_distutils = lambda: None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def pip_imported_during_build():
|
||||||
|
"""
|
||||||
|
Detect if pip is being imported in a build script. Ref #2355.
|
||||||
|
"""
|
||||||
|
import traceback
|
||||||
|
return any(
|
||||||
|
frame.f_globals['__file__'].endswith('setup.py')
|
||||||
|
for frame, line in traceback.walk_stack(None)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
DISTUTILS_FINDER = DistutilsMetaFinder()
|
||||||
|
|
||||||
|
|
||||||
|
def add_shim():
|
||||||
|
sys.meta_path.insert(0, DISTUTILS_FINDER)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_shim():
|
||||||
|
try:
|
||||||
|
sys.meta_path.remove(DISTUTILS_FINDER)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
__import__('_distutils_hack').do_override()
|
@ -0,0 +1,48 @@
|
|||||||
|
Main contributors
|
||||||
|
-----------------
|
||||||
|
- Hideo Hattori (https://github.com/hhatto)
|
||||||
|
- Steven Myint (https://github.com/myint)
|
||||||
|
- Bill Wendling (https://github.com/gwelymernans)
|
||||||
|
|
||||||
|
Patches
|
||||||
|
-------
|
||||||
|
- Fraser Tweedale (https://github.com/frasertweedale)
|
||||||
|
- clach04 (https://github.com/clach04)
|
||||||
|
- Marc Abramowitz (https://github.com/msabramo)
|
||||||
|
- dellis23 (https://github.com/dellis23)
|
||||||
|
- Sam Vilain (https://github.com/samv)
|
||||||
|
- Florent Xicluna (https://github.com/florentx)
|
||||||
|
- Andras Tim (https://github.com/andras-tim)
|
||||||
|
- tomscytale (https://github.com/tomscytale)
|
||||||
|
- Filip Noetzel (https://github.com/peritus)
|
||||||
|
- Erik Bray (https://github.com/iguananaut)
|
||||||
|
- Christopher Medrela (https://github.com/chrismedrela)
|
||||||
|
- 小明 (https://github.com/dongweiming)
|
||||||
|
- Andy Hayden (https://github.com/hayd)
|
||||||
|
- Fabio Zadrozny (https://github.com/fabioz)
|
||||||
|
- Alex Chernetz (https://github.com/achernet)
|
||||||
|
- Marc Schlaich (https://github.com/schlamar)
|
||||||
|
- E. M. Bray (https://github.com/embray)
|
||||||
|
- Thomas Hisch (https://github.com/thisch)
|
||||||
|
- Florian Best (https://github.com/spaceone)
|
||||||
|
- Ian Clark (https://github.com/evenicoulddoit)
|
||||||
|
- Khairi Hafsham (https://github.com/khairihafsham)
|
||||||
|
- Neil Halelamien (https://github.com/neilsh)
|
||||||
|
- Hashem Nasarat (https://github.com/Hnasar)
|
||||||
|
- Hugo van Kemenade (https://github.com/hugovk)
|
||||||
|
- gmbnomis (https://github.com/gmbnomis)
|
||||||
|
- Samuel Lelièvre (https://github.com/slel)
|
||||||
|
- bigredengineer (https://github.com/bigredengineer)
|
||||||
|
- Kai Chen (https://github.com/kx-chen)
|
||||||
|
- Anthony Sottile (https://github.com/asottile)
|
||||||
|
- 秋葉 (https://github.com/Hanaasagi)
|
||||||
|
- Christian Clauss (https://github.com/cclauss)
|
||||||
|
- tobixx (https://github.com/tobixx)
|
||||||
|
- bigredengineer (https://github.com/bigredengineer)
|
||||||
|
- Bastien Gérard (https://github.com/bagerard)
|
||||||
|
- nicolasbonifas (https://github.com/nicolasbonifas)
|
||||||
|
- Andrii Yurchuk (https://github.com/Ch00k)
|
||||||
|
- José M. Guisado (https://github.com/pvxe)
|
||||||
|
- Dai Truong (https://github.com/NovaDev94)
|
||||||
|
- jnozsc (https://github.com/jnozsc)
|
||||||
|
- Edwin Shepherd (https://github.com/shardros)
|
@ -0,0 +1 @@
|
|||||||
|
pip
|
@ -0,0 +1,23 @@
|
|||||||
|
Copyright (C) 2010-2011 Hideo Hattori
|
||||||
|
Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
|
||||||
|
Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
@ -0,0 +1,475 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: autopep8
|
||||||
|
Version: 1.7.0
|
||||||
|
Summary: A tool that automatically formats Python code to conform to the PEP 8 style guide
|
||||||
|
Home-page: https://github.com/hhatto/autopep8
|
||||||
|
Author: Hideo Hattori
|
||||||
|
Author-email: hhatto.jp@gmail.com
|
||||||
|
License: Expat License
|
||||||
|
Keywords: automation,pep8,format,pycodestyle
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Environment :: Console
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Programming Language :: Python :: 2
|
||||||
|
Classifier: Programming Language :: Python :: 2.7
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3.6
|
||||||
|
Classifier: Programming Language :: Python :: 3.7
|
||||||
|
Classifier: Programming Language :: Python :: 3.8
|
||||||
|
Classifier: Programming Language :: Python :: 3.9
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||||
|
Classifier: Topic :: Software Development :: Quality Assurance
|
||||||
|
License-File: LICENSE
|
||||||
|
License-File: AUTHORS.rst
|
||||||
|
Requires-Dist: pycodestyle (>=2.9.1)
|
||||||
|
Requires-Dist: toml
|
||||||
|
|
||||||
|
========
|
||||||
|
autopep8
|
||||||
|
========
|
||||||
|
|
||||||
|
.. image:: https://img.shields.io/pypi/v/autopep8.svg
|
||||||
|
:target: https://pypi.org/project/autopep8/
|
||||||
|
:alt: PyPI Version
|
||||||
|
|
||||||
|
.. image:: https://github.com/hhatto/autopep8/workflows/Python%20package/badge.svg
|
||||||
|
:target: https://github.com/hhatto/autopep8/actions
|
||||||
|
:alt: Build status
|
||||||
|
|
||||||
|
.. image:: https://codecov.io/gh/hhatto/autopep8/branch/main/graph/badge.svg
|
||||||
|
:target: https://codecov.io/gh/hhatto/autopep8
|
||||||
|
:alt: Code Coverage
|
||||||
|
|
||||||
|
autopep8 automatically formats Python code to conform to the `PEP 8`_ style
|
||||||
|
guide. It uses the pycodestyle_ utility to determine what parts of the code
|
||||||
|
needs to be formatted. autopep8 is capable of fixing most of the formatting
|
||||||
|
issues_ that can be reported by pycodestyle.
|
||||||
|
|
||||||
|
.. _PEP 8: https://www.python.org/dev/peps/pep-0008/
|
||||||
|
.. _issues: https://pycodestyle.readthedocs.org/en/latest/intro.html#error-codes
|
||||||
|
|
||||||
|
.. contents::
|
||||||
|
|
||||||
|
|
||||||
|
Installation
|
||||||
|
============
|
||||||
|
|
||||||
|
From pip::
|
||||||
|
|
||||||
|
$ pip install --upgrade autopep8
|
||||||
|
|
||||||
|
Consider using the ``--user`` option_.
|
||||||
|
|
||||||
|
.. _option: https://pip.pypa.io/en/latest/user_guide/#user-installs
|
||||||
|
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
============
|
||||||
|
|
||||||
|
autopep8 requires pycodestyle_.
|
||||||
|
|
||||||
|
.. _pycodestyle: https://github.com/PyCQA/pycodestyle
|
||||||
|
|
||||||
|
|
||||||
|
Usage
|
||||||
|
=====
|
||||||
|
|
||||||
|
To modify a file in place (with aggressive level 2)::
|
||||||
|
|
||||||
|
$ autopep8 --in-place --aggressive --aggressive <filename>
|
||||||
|
|
||||||
|
Before running autopep8.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import math, sys;
|
||||||
|
|
||||||
|
def example1():
|
||||||
|
####This is a long comment. This should be wrapped to fit within 72 characters.
|
||||||
|
some_tuple=( 1,2, 3,'a' );
|
||||||
|
some_variable={'long':'Long code lines should be wrapped within 79 characters.',
|
||||||
|
'other':[math.pi, 100,200,300,9876543210,'This is a long string that goes on'],
|
||||||
|
'more':{'inner':'This whole logical line should be wrapped.',some_tuple:[1,
|
||||||
|
20,300,40000,500000000,60000000000000000]}}
|
||||||
|
return (some_tuple, some_variable)
|
||||||
|
def example2(): return {'has_key() is deprecated':True}.has_key({'f':2}.has_key(''));
|
||||||
|
class Example3( object ):
|
||||||
|
def __init__ ( self, bar ):
|
||||||
|
#Comments should have a space after the hash.
|
||||||
|
if bar : bar+=1; bar=bar* bar ; return bar
|
||||||
|
else:
|
||||||
|
some_string = """
|
||||||
|
Indentation in multiline strings should not be touched.
|
||||||
|
Only actual code should be reindented.
|
||||||
|
"""
|
||||||
|
return (sys.path, some_string)
|
||||||
|
|
||||||
|
After running autopep8.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def example1():
|
||||||
|
# This is a long comment. This should be wrapped to fit within 72
|
||||||
|
# characters.
|
||||||
|
some_tuple = (1, 2, 3, 'a')
|
||||||
|
some_variable = {
|
||||||
|
'long': 'Long code lines should be wrapped within 79 characters.',
|
||||||
|
'other': [
|
||||||
|
math.pi,
|
||||||
|
100,
|
||||||
|
200,
|
||||||
|
300,
|
||||||
|
9876543210,
|
||||||
|
'This is a long string that goes on'],
|
||||||
|
'more': {
|
||||||
|
'inner': 'This whole logical line should be wrapped.',
|
||||||
|
some_tuple: [
|
||||||
|
1,
|
||||||
|
20,
|
||||||
|
300,
|
||||||
|
40000,
|
||||||
|
500000000,
|
||||||
|
60000000000000000]}}
|
||||||
|
return (some_tuple, some_variable)
|
||||||
|
|
||||||
|
|
||||||
|
def example2(): return ('' in {'f': 2}) in {'has_key() is deprecated': True}
|
||||||
|
|
||||||
|
|
||||||
|
class Example3(object):
|
||||||
|
def __init__(self, bar):
|
||||||
|
# Comments should have a space after the hash.
|
||||||
|
if bar:
|
||||||
|
bar += 1
|
||||||
|
bar = bar * bar
|
||||||
|
return bar
|
||||||
|
else:
|
||||||
|
some_string = """
|
||||||
|
Indentation in multiline strings should not be touched.
|
||||||
|
Only actual code should be reindented.
|
||||||
|
"""
|
||||||
|
return (sys.path, some_string)
|
||||||
|
|
||||||
|
Options::
|
||||||
|
|
||||||
|
usage: autopep8 [-h] [--version] [-v] [-d] [-i] [--global-config filename]
|
||||||
|
[--ignore-local-config] [-r] [-j n] [-p n] [-a]
|
||||||
|
[--experimental] [--exclude globs] [--list-fixes]
|
||||||
|
[--ignore errors] [--select errors] [--max-line-length n]
|
||||||
|
[--line-range line line] [--hang-closing] [--exit-code]
|
||||||
|
[files [files ...]]
|
||||||
|
|
||||||
|
Automatically formats Python code to conform to the PEP 8 style guide.
|
||||||
|
|
||||||
|
positional arguments:
|
||||||
|
files files to format or '-' for standard in
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
--version show program's version number and exit
|
||||||
|
-v, --verbose print verbose messages; multiple -v result in more
|
||||||
|
verbose messages
|
||||||
|
-d, --diff print the diff for the fixed source
|
||||||
|
-i, --in-place make changes to files in place
|
||||||
|
--global-config filename
|
||||||
|
path to a global pep8 config file; if this file does
|
||||||
|
not exist then this is ignored (default:
|
||||||
|
~/.config/pep8)
|
||||||
|
--ignore-local-config
|
||||||
|
don't look for and apply local config files; if not
|
||||||
|
passed, defaults are updated with any config files in
|
||||||
|
the project's root directory
|
||||||
|
-r, --recursive run recursively over directories; must be used with
|
||||||
|
--in-place or --diff
|
||||||
|
-j n, --jobs n number of parallel jobs; match CPU count if value is
|
||||||
|
less than 1
|
||||||
|
-p n, --pep8-passes n
|
||||||
|
maximum number of additional pep8 passes (default:
|
||||||
|
infinite)
|
||||||
|
-a, --aggressive enable non-whitespace changes; multiple -a result in
|
||||||
|
more aggressive changes
|
||||||
|
--experimental enable experimental fixes
|
||||||
|
--exclude globs exclude file/directory names that match these comma-
|
||||||
|
separated globs
|
||||||
|
--list-fixes list codes for fixes; used by --ignore and --select
|
||||||
|
--ignore errors do not fix these errors/warnings (default:
|
||||||
|
E226,E24,W50,W690)
|
||||||
|
--select errors fix only these errors/warnings (e.g. E4,W)
|
||||||
|
--max-line-length n set maximum allowed line length (default: 79)
|
||||||
|
--line-range line line, --range line line
|
||||||
|
only fix errors found within this inclusive range of
|
||||||
|
line numbers (e.g. 1 99); line numbers are indexed at
|
||||||
|
1
|
||||||
|
--hang-closing hang-closing option passed to pycodestyle
|
||||||
|
--exit-code change to behavior of exit code. default behavior of
|
||||||
|
return value, 0 is no differences, 1 is error exit.
|
||||||
|
return 2 when add this option. 2 is exists
|
||||||
|
differences.
|
||||||
|
|
||||||
|
|
||||||
|
Features
|
||||||
|
========
|
||||||
|
|
||||||
|
autopep8 fixes the following issues_ reported by pycodestyle_::
|
||||||
|
|
||||||
|
E101 - Reindent all lines.
|
||||||
|
E11 - Fix indentation.
|
||||||
|
E121 - Fix indentation to be a multiple of four.
|
||||||
|
E122 - Add absent indentation for hanging indentation.
|
||||||
|
E123 - Align closing bracket to match opening bracket.
|
||||||
|
E124 - Align closing bracket to match visual indentation.
|
||||||
|
E125 - Indent to distinguish line from next logical line.
|
||||||
|
E126 - Fix over-indented hanging indentation.
|
||||||
|
E127 - Fix visual indentation.
|
||||||
|
E128 - Fix visual indentation.
|
||||||
|
E129 - Fix visual indentation.
|
||||||
|
E131 - Fix hanging indent for unaligned continuation line.
|
||||||
|
E133 - Fix missing indentation for closing bracket.
|
||||||
|
E20 - Remove extraneous whitespace.
|
||||||
|
E211 - Remove extraneous whitespace.
|
||||||
|
E22 - Fix extraneous whitespace around keywords.
|
||||||
|
E224 - Remove extraneous whitespace around operator.
|
||||||
|
E225 - Fix missing whitespace around operator.
|
||||||
|
E226 - Fix missing whitespace around arithmetic operator.
|
||||||
|
E227 - Fix missing whitespace around bitwise/shift operator.
|
||||||
|
E228 - Fix missing whitespace around modulo operator.
|
||||||
|
E231 - Add missing whitespace.
|
||||||
|
E241 - Fix extraneous whitespace around keywords.
|
||||||
|
E242 - Remove extraneous whitespace around operator.
|
||||||
|
E251 - Remove whitespace around parameter '=' sign.
|
||||||
|
E252 - Missing whitespace around parameter equals.
|
||||||
|
E26 - Fix spacing after comment hash for inline comments.
|
||||||
|
E265 - Fix spacing after comment hash for block comments.
|
||||||
|
E266 - Fix too many leading '#' for block comments.
|
||||||
|
E27 - Fix extraneous whitespace around keywords.
|
||||||
|
E301 - Add missing blank line.
|
||||||
|
E302 - Add missing 2 blank lines.
|
||||||
|
E303 - Remove extra blank lines.
|
||||||
|
E304 - Remove blank line following function decorator.
|
||||||
|
E305 - Expected 2 blank lines after end of function or class.
|
||||||
|
E306 - Expected 1 blank line before a nested definition.
|
||||||
|
E401 - Put imports on separate lines.
|
||||||
|
E402 - Fix module level import not at top of file
|
||||||
|
E501 - Try to make lines fit within --max-line-length characters.
|
||||||
|
E502 - Remove extraneous escape of newline.
|
||||||
|
E701 - Put colon-separated compound statement on separate lines.
|
||||||
|
E70 - Put semicolon-separated compound statement on separate lines.
|
||||||
|
E711 - Fix comparison with None.
|
||||||
|
E712 - Fix comparison with boolean.
|
||||||
|
E713 - Use 'not in' for test for membership.
|
||||||
|
E714 - Use 'is not' test for object identity.
|
||||||
|
E721 - Use "isinstance()" instead of comparing types directly.
|
||||||
|
E722 - Fix bare except.
|
||||||
|
E731 - Use a def when use do not assign a lambda expression.
|
||||||
|
W291 - Remove trailing whitespace.
|
||||||
|
W292 - Add a single newline at the end of the file.
|
||||||
|
W293 - Remove trailing whitespace on blank line.
|
||||||
|
W391 - Remove trailing blank lines.
|
||||||
|
W503 - Fix line break before binary operator.
|
||||||
|
W504 - Fix line break after binary operator.
|
||||||
|
W601 - Use "in" rather than "has_key()".
|
||||||
|
W602 - Fix deprecated form of raising exception.
|
||||||
|
W603 - Use "!=" instead of "<>"
|
||||||
|
W604 - Use "repr()" instead of backticks.
|
||||||
|
W605 - Fix invalid escape sequence 'x'.
|
||||||
|
W690 - Fix various deprecated code (via lib2to3).
|
||||||
|
|
||||||
|
autopep8 also fixes some issues not found by pycodestyle_.
|
||||||
|
|
||||||
|
- Correct deprecated or non-idiomatic Python code (via ``lib2to3``). Use this
|
||||||
|
for making Python 2.7 code more compatible with Python 3. (This is triggered
|
||||||
|
if ``W690`` is enabled.)
|
||||||
|
- Normalize files with mixed line endings.
|
||||||
|
- Put a blank line between a class docstring and its first method
|
||||||
|
declaration. (Enabled with ``E301``.)
|
||||||
|
- Remove blank lines between a function declaration and its docstring. (Enabled
|
||||||
|
with ``E303``.)
|
||||||
|
|
||||||
|
autopep8 avoids fixing some issues found by pycodestyle_.
|
||||||
|
|
||||||
|
- ``E112``/``E113`` for non comments are reports of bad indentation that break
|
||||||
|
syntax rules. These should not be modified at all.
|
||||||
|
- ``E265``, which refers to spacing after comment hash, is ignored if the
|
||||||
|
comment looks like code. autopep8 avoids modifying these since they are not
|
||||||
|
real comments. If you really want to get rid of the pycodestyle_ warning,
|
||||||
|
consider just removing the commented-out code. (This can be automated via
|
||||||
|
eradicate_.)
|
||||||
|
|
||||||
|
.. _eradicate: https://github.com/myint/eradicate
|
||||||
|
|
||||||
|
|
||||||
|
More advanced usage
|
||||||
|
===================
|
||||||
|
|
||||||
|
By default autopep8 only makes whitespace changes. Thus, by default, it does
|
||||||
|
not fix ``E711`` and ``E712``. (Changing ``x == None`` to ``x is None`` may
|
||||||
|
change the meaning of the program if ``x`` has its ``__eq__`` method
|
||||||
|
overridden.) Nor does it correct deprecated code ``W6``. To enable these
|
||||||
|
more aggressive fixes, use the ``--aggressive`` option::
|
||||||
|
|
||||||
|
$ autopep8 --aggressive <filename>
|
||||||
|
|
||||||
|
Use multiple ``--aggressive`` to increase the aggressiveness level. For
|
||||||
|
example, ``E712`` requires aggressiveness level 2 (since ``x == True`` could be
|
||||||
|
changed to either ``x`` or ``x is True``, but autopep8 chooses the former).
|
||||||
|
|
||||||
|
``--aggressive`` will also shorten lines more aggressively. It will also remove
|
||||||
|
trailing whitespace more aggressively. (Usually, we don't touch trailing
|
||||||
|
whitespace in docstrings and other multiline strings. And to do even more
|
||||||
|
aggressive changes to docstrings, use docformatter_.)
|
||||||
|
|
||||||
|
.. _docformatter: https://github.com/myint/docformatter
|
||||||
|
|
||||||
|
To enable only a subset of the fixes, use the ``--select`` option. For example,
|
||||||
|
to fix various types of indentation issues::
|
||||||
|
|
||||||
|
$ autopep8 --select=E1,W1 <filename>
|
||||||
|
|
||||||
|
Similarly, to just fix deprecated code::
|
||||||
|
|
||||||
|
$ autopep8 --aggressive --select=W6 <filename>
|
||||||
|
|
||||||
|
The above is useful when trying to port a single code base to work with both
|
||||||
|
Python 2 and Python 3 at the same time.
|
||||||
|
|
||||||
|
If the file being fixed is large, you may want to enable verbose progress
|
||||||
|
messages::
|
||||||
|
|
||||||
|
$ autopep8 -v <filename>
|
||||||
|
|
||||||
|
Passing in ``--experimental`` enables the following functionality:
|
||||||
|
|
||||||
|
- Shortens code lines by taking its length into account
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
$ autopep8 --experimental <filename>
|
||||||
|
|
||||||
|
Disabling line-by-line
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
It is possible to disable autopep8 untill it it turned back on again in the file, using ``autopep8: off`` and then renabling ``autopep8: on``.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
# autopep8: off
|
||||||
|
[
|
||||||
|
[23, 23, 13, 43],
|
||||||
|
[32, 34, 34, 34],
|
||||||
|
[56, 34, 34, 11],
|
||||||
|
[10, 10, 10, 10],
|
||||||
|
]
|
||||||
|
# autopep8: on
|
||||||
|
|
||||||
|
``fmt: off`` and ``fmt: on`` are also valid.
|
||||||
|
|
||||||
|
Use as a module
|
||||||
|
===============
|
||||||
|
|
||||||
|
The simplest way of using autopep8 as a module is via the ``fix_code()``
|
||||||
|
function:
|
||||||
|
|
||||||
|
>>> import autopep8
|
||||||
|
>>> autopep8.fix_code('x= 123\n')
|
||||||
|
'x = 123\n'
|
||||||
|
|
||||||
|
Or with options:
|
||||||
|
|
||||||
|
>>> import autopep8
|
||||||
|
>>> autopep8.fix_code('x.has_key(y)\n',
|
||||||
|
... options={'aggressive': 1})
|
||||||
|
'y in x\n'
|
||||||
|
>>> autopep8.fix_code('print( 123 )\n',
|
||||||
|
... options={'ignore': ['E']})
|
||||||
|
'print( 123 )\n'
|
||||||
|
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
=============
|
||||||
|
|
||||||
|
By default, if ``$HOME/.config/pycodestyle`` (``~\.pycodestyle`` in Windows
|
||||||
|
environment) exists, it will be used as global configuration file.
|
||||||
|
Alternatively, you can specify the global configuration file with the
|
||||||
|
``--global-config`` option.
|
||||||
|
|
||||||
|
Also, if ``setup.cfg``, ``tox.ini``, ``.pep8`` and ``.flake8`` files exist
|
||||||
|
in the directory where the target file exists, it will be used as the
|
||||||
|
configuration file.
|
||||||
|
|
||||||
|
``pep8``, ``pycodestyle``, and ``flake8`` can be used as a section.
|
||||||
|
|
||||||
|
configuration file example::
|
||||||
|
|
||||||
|
[pycodestyle]
|
||||||
|
max_line_length = 120
|
||||||
|
ignore = E501
|
||||||
|
|
||||||
|
pyproject.toml
|
||||||
|
--------------
|
||||||
|
|
||||||
|
autopep8 can also use ``pyproject.toml``.
|
||||||
|
The section must be ``[tool.autopep8]``, and ``pyproject.toml`` takes precedence
|
||||||
|
over any other configuration files.
|
||||||
|
|
||||||
|
configuration file example::
|
||||||
|
|
||||||
|
[tool.autopep8]
|
||||||
|
max_line_length = 120
|
||||||
|
ignore = "E501,W6" # or ["E501", "W6"]
|
||||||
|
in-place = true
|
||||||
|
recursive = true
|
||||||
|
aggressive = 3
|
||||||
|
|
||||||
|
|
||||||
|
Testing
|
||||||
|
=======
|
||||||
|
|
||||||
|
Test cases are in ``test/test_autopep8.py``. They can be run directly via
|
||||||
|
``python test/test_autopep8.py`` or via tox_. The latter is useful for
|
||||||
|
testing against multiple Python interpreters. (We currently test against
|
||||||
|
CPython versions 2.7, 3.6 3.7 and 3.8. We also test against PyPy.)
|
||||||
|
|
||||||
|
.. _`tox`: https://pypi.org/project/tox/
|
||||||
|
|
||||||
|
Broad spectrum testing is available via ``test/acid.py``. This script runs
|
||||||
|
autopep8 against Python code and checks for correctness and completeness of the
|
||||||
|
code fixes. It can check that the bytecode remains identical.
|
||||||
|
``test/acid_pypi.py`` makes use of ``acid.py`` to test against the latest
|
||||||
|
released packages on PyPI.
|
||||||
|
|
||||||
|
|
||||||
|
Troubleshooting
|
||||||
|
===============
|
||||||
|
|
||||||
|
``pkg_resources.DistributionNotFound``
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
If you are using an ancient version of ``setuptools``, you might encounter
|
||||||
|
``pkg_resources.DistributionNotFound`` when trying to run ``autopep8``. Try
|
||||||
|
upgrading ``setuptools`` to workaround this ``setuptools`` problem::
|
||||||
|
|
||||||
|
$ pip install --upgrade setuptools
|
||||||
|
|
||||||
|
Use ``sudo`` if you are installing to the system.
|
||||||
|
|
||||||
|
|
||||||
|
Links
|
||||||
|
=====
|
||||||
|
|
||||||
|
* PyPI_
|
||||||
|
* GitHub_
|
||||||
|
* `Travis CI`_
|
||||||
|
* Coveralls_
|
||||||
|
|
||||||
|
.. _PyPI: https://pypi.org/project/autopep8/
|
||||||
|
.. _GitHub: https://github.com/hhatto/autopep8
|
||||||
|
.. _`Travis CI`: https://travis-ci.org/hhatto/autopep8
|
||||||
|
.. _`Coveralls`: https://coveralls.io/r/hhatto/autopep8
|
@ -0,0 +1,12 @@
|
|||||||
|
../../Scripts/autopep8.exe,sha256=7IpamSCW-NAY8GsdHG6yZNsdmtNdQDMwFfPg9uR5K2g,107895
|
||||||
|
__pycache__/autopep8.cpython-310.pyc,,
|
||||||
|
autopep8-1.7.0.dist-info/AUTHORS.rst,sha256=tiTPsbzGl9dtXCMEWXbWSV1zan1M-BoWtiixs46GIWk,2003
|
||||||
|
autopep8-1.7.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
autopep8-1.7.0.dist-info/LICENSE,sha256=jR0COOSFQ0QZFMqwdB1N4-Bwobg2f3h69fIJr7YLCWo,1181
|
||||||
|
autopep8-1.7.0.dist-info/METADATA,sha256=uf9qENqUy_VnrVYXoyCkoLVjkcbTVut_FPcntXpbFQk,17302
|
||||||
|
autopep8-1.7.0.dist-info/RECORD,,
|
||||||
|
autopep8-1.7.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
autopep8-1.7.0.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
||||||
|
autopep8-1.7.0.dist-info/entry_points.txt,sha256=zEduLXzN3YzTTZBwxjhEKW7PVLqSqVG8-ocCaCR3P4A,43
|
||||||
|
autopep8-1.7.0.dist-info/top_level.txt,sha256=s2x-di3QBwGxr7kd5xErt2pom8dsFRdINbmwsOEgLfU,9
|
||||||
|
autopep8.py,sha256=DS3qpM_YacgSCQWofj_6yRbkFr12T_IX1fS9HShhgYs,156300
|
@ -0,0 +1,6 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.34.2)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py2-none-any
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
@ -0,0 +1,2 @@
|
|||||||
|
[console_scripts]
|
||||||
|
autopep8 = autopep8:main
|
@ -0,0 +1 @@
|
|||||||
|
autopep8
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1 @@
|
|||||||
|
pip
|
@ -0,0 +1,21 @@
|
|||||||
|
This package contains a modified version of ca-bundle.crt:
|
||||||
|
|
||||||
|
ca-bundle.crt -- Bundle of CA Root Certificates
|
||||||
|
|
||||||
|
Certificate data from Mozilla as of: Thu Nov 3 19:04:19 2011#
|
||||||
|
This is a bundle of X.509 certificates of public Certificate Authorities
|
||||||
|
(CA). These were automatically extracted from Mozilla's root certificates
|
||||||
|
file (certdata.txt). This file can be found in the mozilla source tree:
|
||||||
|
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
|
||||||
|
It contains the certificates in PEM format and therefore
|
||||||
|
can be directly used with curl / libcurl / php_curl, or with
|
||||||
|
an Apache+mod_ssl webserver for SSL client authentication.
|
||||||
|
Just configure this file as the SSLCACertificateFile.#
|
||||||
|
|
||||||
|
***** BEGIN LICENSE BLOCK *****
|
||||||
|
This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||||
|
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
|
||||||
|
one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
***** END LICENSE BLOCK *****
|
||||||
|
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $
|
@ -0,0 +1,83 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: certifi
|
||||||
|
Version: 2022.9.14
|
||||||
|
Summary: Python package for providing Mozilla's CA Bundle.
|
||||||
|
Home-page: https://github.com/certifi/python-certifi
|
||||||
|
Author: Kenneth Reitz
|
||||||
|
Author-email: me@kennethreitz.com
|
||||||
|
License: MPL-2.0
|
||||||
|
Project-URL: Source, https://github.com/certifi/python-certifi
|
||||||
|
Platform: UNKNOWN
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
||||||
|
Classifier: Natural Language :: English
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3 :: Only
|
||||||
|
Classifier: Programming Language :: Python :: 3.6
|
||||||
|
Classifier: Programming Language :: Python :: 3.7
|
||||||
|
Classifier: Programming Language :: Python :: 3.8
|
||||||
|
Classifier: Programming Language :: Python :: 3.9
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Programming Language :: Python :: 3.11
|
||||||
|
Requires-Python: >=3.6
|
||||||
|
License-File: LICENSE
|
||||||
|
|
||||||
|
Certifi: Python SSL Certificates
|
||||||
|
================================
|
||||||
|
|
||||||
|
Certifi provides Mozilla's carefully curated collection of Root Certificates for
|
||||||
|
validating the trustworthiness of SSL certificates while verifying the identity
|
||||||
|
of TLS hosts. It has been extracted from the `Requests`_ project.
|
||||||
|
|
||||||
|
Installation
|
||||||
|
------------
|
||||||
|
|
||||||
|
``certifi`` is available on PyPI. Simply install it with ``pip``::
|
||||||
|
|
||||||
|
$ pip install certifi
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
To reference the installed certificate authority (CA) bundle, you can use the
|
||||||
|
built-in function::
|
||||||
|
|
||||||
|
>>> import certifi
|
||||||
|
|
||||||
|
>>> certifi.where()
|
||||||
|
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
|
||||||
|
|
||||||
|
Or from the command line::
|
||||||
|
|
||||||
|
$ python -m certifi
|
||||||
|
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
|
||||||
|
|
||||||
|
Enjoy!
|
||||||
|
|
||||||
|
1024-bit Root Certificates
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Browsers and certificate authorities have concluded that 1024-bit keys are
|
||||||
|
unacceptably weak for certificates, particularly root certificates. For this
|
||||||
|
reason, Mozilla has removed any weak (i.e. 1024-bit key) certificate from its
|
||||||
|
bundle, replacing it with an equivalent strong (i.e. 2048-bit or greater key)
|
||||||
|
certificate from the same CA. Because Mozilla removed these certificates from
|
||||||
|
its bundle, ``certifi`` removed them as well.
|
||||||
|
|
||||||
|
In previous versions, ``certifi`` provided the ``certifi.old_where()`` function
|
||||||
|
to intentionally re-add the 1024-bit roots back into your bundle. This was not
|
||||||
|
recommended in production and therefore was removed at the end of 2018.
|
||||||
|
|
||||||
|
.. _`Requests`: https://requests.readthedocs.io/en/master/
|
||||||
|
|
||||||
|
Addition/Removal of Certificates
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
Certifi does not support any addition/removal or other modification of the
|
||||||
|
CA trust store content. This project is intended to provide a reliable and
|
||||||
|
highly portable root of trust to python deployments. Look to upstream projects
|
||||||
|
for methods to use alternate trust.
|
||||||
|
|
||||||
|
|
@ -0,0 +1,14 @@
|
|||||||
|
certifi-2022.9.14.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
certifi-2022.9.14.dist-info/LICENSE,sha256=oC9sY4-fuE0G93ZMOrCF2K9-2luTwWbaVDEkeQd8b7A,1052
|
||||||
|
certifi-2022.9.14.dist-info/METADATA,sha256=lsqN8y6OLCwM93kA5dVNxpg5FiKRWjBaoLKoVVuZ6rQ,2911
|
||||||
|
certifi-2022.9.14.dist-info/RECORD,,
|
||||||
|
certifi-2022.9.14.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
||||||
|
certifi-2022.9.14.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
|
||||||
|
certifi/__init__.py,sha256=Fev1PsF1er2EAJBvBG2ImLp0J0Z8XTjCKhX-WWLBaAE,94
|
||||||
|
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
|
||||||
|
certifi/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
certifi/__pycache__/__main__.cpython-310.pyc,,
|
||||||
|
certifi/__pycache__/core.cpython-310.pyc,,
|
||||||
|
certifi/cacert.pem,sha256=_T2XVUva7T81lnw-Lw4v8Sp0ZuyD0FCWfHkbq2BZJJk,289012
|
||||||
|
certifi/core.py,sha256=lhewz0zFb2b4ULsQurElmloYwQoecjWzPqY67P8T7iM,4219
|
||||||
|
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@ -0,0 +1,5 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.37.0)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
certifi
|
@ -0,0 +1,4 @@
|
|||||||
|
from .core import contents, where
|
||||||
|
|
||||||
|
__all__ = ["contents", "where"]
|
||||||
|
__version__ = "2022.09.14"
|
@ -0,0 +1,12 @@
|
|||||||
|
import argparse
|
||||||
|
|
||||||
|
from certifi import contents, where
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-c", "--contents", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.contents:
|
||||||
|
print(contents())
|
||||||
|
else:
|
||||||
|
print(where())
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,108 @@
|
|||||||
|
"""
|
||||||
|
certifi.py
|
||||||
|
~~~~~~~~~~
|
||||||
|
|
||||||
|
This module returns the installation location of cacert.pem or its contents.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info >= (3, 11):
|
||||||
|
|
||||||
|
from importlib.resources import as_file, files
|
||||||
|
|
||||||
|
_CACERT_CTX = None
|
||||||
|
_CACERT_PATH = None
|
||||||
|
|
||||||
|
def where() -> str:
|
||||||
|
# This is slightly terrible, but we want to delay extracting the file
|
||||||
|
# in cases where we're inside of a zipimport situation until someone
|
||||||
|
# actually calls where(), but we don't want to re-extract the file
|
||||||
|
# on every call of where(), so we'll do it once then store it in a
|
||||||
|
# global variable.
|
||||||
|
global _CACERT_CTX
|
||||||
|
global _CACERT_PATH
|
||||||
|
if _CACERT_PATH is None:
|
||||||
|
# This is slightly janky, the importlib.resources API wants you to
|
||||||
|
# manage the cleanup of this file, so it doesn't actually return a
|
||||||
|
# path, it returns a context manager that will give you the path
|
||||||
|
# when you enter it and will do any cleanup when you leave it. In
|
||||||
|
# the common case of not needing a temporary file, it will just
|
||||||
|
# return the file system location and the __exit__() is a no-op.
|
||||||
|
#
|
||||||
|
# We also have to hold onto the actual context manager, because
|
||||||
|
# it will do the cleanup whenever it gets garbage collected, so
|
||||||
|
# we will also store that at the global level as well.
|
||||||
|
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
|
||||||
|
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||||
|
|
||||||
|
return _CACERT_PATH
|
||||||
|
|
||||||
|
def contents() -> str:
|
||||||
|
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
|
||||||
|
|
||||||
|
elif sys.version_info >= (3, 7):
|
||||||
|
|
||||||
|
from importlib.resources import path as get_path, read_text
|
||||||
|
|
||||||
|
_CACERT_CTX = None
|
||||||
|
_CACERT_PATH = None
|
||||||
|
|
||||||
|
def where() -> str:
|
||||||
|
# This is slightly terrible, but we want to delay extracting the
|
||||||
|
# file in cases where we're inside of a zipimport situation until
|
||||||
|
# someone actually calls where(), but we don't want to re-extract
|
||||||
|
# the file on every call of where(), so we'll do it once then store
|
||||||
|
# it in a global variable.
|
||||||
|
global _CACERT_CTX
|
||||||
|
global _CACERT_PATH
|
||||||
|
if _CACERT_PATH is None:
|
||||||
|
# This is slightly janky, the importlib.resources API wants you
|
||||||
|
# to manage the cleanup of this file, so it doesn't actually
|
||||||
|
# return a path, it returns a context manager that will give
|
||||||
|
# you the path when you enter it and will do any cleanup when
|
||||||
|
# you leave it. In the common case of not needing a temporary
|
||||||
|
# file, it will just return the file system location and the
|
||||||
|
# __exit__() is a no-op.
|
||||||
|
#
|
||||||
|
# We also have to hold onto the actual context manager, because
|
||||||
|
# it will do the cleanup whenever it gets garbage collected, so
|
||||||
|
# we will also store that at the global level as well.
|
||||||
|
_CACERT_CTX = get_path("certifi", "cacert.pem")
|
||||||
|
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||||
|
|
||||||
|
return _CACERT_PATH
|
||||||
|
|
||||||
|
def contents() -> str:
|
||||||
|
return read_text("certifi", "cacert.pem", encoding="ascii")
|
||||||
|
|
||||||
|
else:
|
||||||
|
import os
|
||||||
|
import types
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
Package = Union[types.ModuleType, str]
|
||||||
|
Resource = Union[str, "os.PathLike"]
|
||||||
|
|
||||||
|
# This fallback will work for Python versions prior to 3.7 that lack the
|
||||||
|
# importlib.resources module but relies on the existing `where` function
|
||||||
|
# so won't address issues with environments like PyOxidizer that don't set
|
||||||
|
# __file__ on modules.
|
||||||
|
def read_text(
|
||||||
|
package: Package,
|
||||||
|
resource: Resource,
|
||||||
|
encoding: str = 'utf-8',
|
||||||
|
errors: str = 'strict'
|
||||||
|
) -> str:
|
||||||
|
with open(where(), encoding=encoding) as data:
|
||||||
|
return data.read()
|
||||||
|
|
||||||
|
# If we don't have importlib.resources, then we will just do the old logic
|
||||||
|
# of assuming we're on the filesystem and munge the path directly.
|
||||||
|
def where() -> str:
|
||||||
|
f = os.path.dirname(__file__)
|
||||||
|
|
||||||
|
return os.path.join(f, "cacert.pem")
|
||||||
|
|
||||||
|
def contents() -> str:
|
||||||
|
return read_text("certifi", "cacert.pem", encoding="ascii")
|
@ -0,0 +1 @@
|
|||||||
|
pip
|
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2019 TAHRI Ahmed R.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
@ -0,0 +1,269 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: charset-normalizer
|
||||||
|
Version: 2.1.1
|
||||||
|
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||||
|
Home-page: https://github.com/ousret/charset_normalizer
|
||||||
|
Author: Ahmed TAHRI @Ousret
|
||||||
|
Author-email: ahmed.tahri@cloudnursery.dev
|
||||||
|
License: MIT
|
||||||
|
Project-URL: Bug Reports, https://github.com/Ousret/charset_normalizer/issues
|
||||||
|
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/en/latest
|
||||||
|
Keywords: encoding,i18n,txt,text,charset,charset-detector,normalization,unicode,chardet
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3.6
|
||||||
|
Classifier: Programming Language :: Python :: 3.7
|
||||||
|
Classifier: Programming Language :: Python :: 3.8
|
||||||
|
Classifier: Programming Language :: Python :: 3.9
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Programming Language :: Python :: 3.11
|
||||||
|
Classifier: Topic :: Text Processing :: Linguistic
|
||||||
|
Classifier: Topic :: Utilities
|
||||||
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||||
|
Classifier: Typing :: Typed
|
||||||
|
Requires-Python: >=3.6.0
|
||||||
|
Description-Content-Type: text/markdown
|
||||||
|
License-File: LICENSE
|
||||||
|
Provides-Extra: unicode_backport
|
||||||
|
Requires-Dist: unicodedata2 ; extra == 'unicode_backport'
|
||||||
|
|
||||||
|
|
||||||
|
<h1 align="center">Charset Detection, for Everyone 👋 <a href="https://twitter.com/intent/tweet?text=The%20Real%20First%20Universal%20Charset%20%26%20Language%20Detector&url=https://www.github.com/Ousret/charset_normalizer&hashtags=python,encoding,chardet,developers"><img src="https://img.shields.io/twitter/url/http/shields.io.svg?style=social"/></a></h1>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<sup>The Real First Universal Charset Detector</sup><br>
|
||||||
|
<a href="https://pypi.org/project/charset-normalizer">
|
||||||
|
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||||
|
</a>
|
||||||
|
<a href="https://codecov.io/gh/Ousret/charset_normalizer">
|
||||||
|
<img src="https://codecov.io/gh/Ousret/charset_normalizer/branch/master/graph/badge.svg" />
|
||||||
|
</a>
|
||||||
|
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||||
|
<img alt="Download Count Total" src="https://pepy.tech/badge/charset-normalizer/month" />
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||||
|
> I'm trying to resolve the issue by taking a new approach.
|
||||||
|
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||||
|
</p>
|
||||||
|
|
||||||
|
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||||
|
|
||||||
|
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||||
|
| ------------- | :-------------: | :------------------: | :------------------: |
|
||||||
|
| `Fast` | ❌<br> | ✅<br> | ✅ <br> |
|
||||||
|
| `Universal**` | ❌ | ✅ | ❌ |
|
||||||
|
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||||
|
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||||
|
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||||
|
| `Native Python` | ✅ | ✅ | ❌ |
|
||||||
|
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||||
|
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||||
|
| `Whl Size` | 193.6 kB | 39.5 kB | ~200 kB |
|
||||||
|
| `Supported Encoding` | 33 | :tada: [93](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||||
|
|
||||||
|
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||||
|
Did you got there because of the logs? See [https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html](https://charset-normalizer.readthedocs.io/en/latest/user/miscellaneous.html)
|
||||||
|
|
||||||
|
## ⭐ Your support
|
||||||
|
|
||||||
|
*Fork, test-it, star-it, submit your ideas! We do listen.*
|
||||||
|
|
||||||
|
## ⚡ Performance
|
||||||
|
|
||||||
|
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||||
|
|
||||||
|
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||||
|
| ------------- | :-------------: | :------------------: | :------------------: |
|
||||||
|
| [chardet](https://github.com/chardet/chardet) | 86 % | 200 ms | 5 file/sec |
|
||||||
|
| charset-normalizer | **98 %** | **39 ms** | 26 file/sec |
|
||||||
|
|
||||||
|
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||||
|
| ------------- | :-------------: | :------------------: | :------------------: |
|
||||||
|
| [chardet](https://github.com/chardet/chardet) | 1200 ms | 287 ms | 23 ms |
|
||||||
|
| charset-normalizer | 400 ms | 200 ms | 15 ms |
|
||||||
|
|
||||||
|
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||||
|
|
||||||
|
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||||
|
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||||
|
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||||
|
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||||
|
> (eg. Supported Encoding) Challenge-them if you want.
|
||||||
|
|
||||||
|
[cchardet](https://github.com/PyYoshi/cChardet) is a non-native (cpp binding) and unmaintained faster alternative with
|
||||||
|
a better accuracy than chardet but lower than this package. If speed is the most important factor, you should try it.
|
||||||
|
|
||||||
|
## ✨ Installation
|
||||||
|
|
||||||
|
Using PyPi for latest stable
|
||||||
|
```sh
|
||||||
|
pip install charset-normalizer -U
|
||||||
|
```
|
||||||
|
|
||||||
|
If you want a more up-to-date `unicodedata` than the one available in your Python setup.
|
||||||
|
```sh
|
||||||
|
pip install charset-normalizer[unicode_backport] -U
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Basic Usage
|
||||||
|
|
||||||
|
### CLI
|
||||||
|
This package comes with a CLI.
|
||||||
|
|
||||||
|
```
|
||||||
|
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||||
|
file [file ...]
|
||||||
|
|
||||||
|
The Real First Universal Charset Detector. Discover originating encoding used
|
||||||
|
on text file. Normalize text to unicode.
|
||||||
|
|
||||||
|
positional arguments:
|
||||||
|
files File(s) to be analysed
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-v, --verbose Display complementary information about file if any.
|
||||||
|
Stdout will contain logs about the detection process.
|
||||||
|
-a, --with-alternative
|
||||||
|
Output complementary possibilities if any. Top-level
|
||||||
|
JSON WILL be a list.
|
||||||
|
-n, --normalize Permit to normalize input file. If not set, program
|
||||||
|
does not write anything.
|
||||||
|
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||||
|
JSON output.
|
||||||
|
-r, --replace Replace file when trying to normalize it instead of
|
||||||
|
creating a new one.
|
||||||
|
-f, --force Replace file without asking if you are sure, use this
|
||||||
|
flag with caution.
|
||||||
|
-t THRESHOLD, --threshold THRESHOLD
|
||||||
|
Define a custom maximum amount of chaos allowed in
|
||||||
|
decoded content. 0. <= chaos <= 1.
|
||||||
|
--version Show version information and exit.
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
normalizer ./data/sample.1.fr.srt
|
||||||
|
```
|
||||||
|
|
||||||
|
:tada: Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||||
|
"encoding": "cp1252",
|
||||||
|
"encoding_aliases": [
|
||||||
|
"1252",
|
||||||
|
"windows_1252"
|
||||||
|
],
|
||||||
|
"alternative_encodings": [
|
||||||
|
"cp1254",
|
||||||
|
"cp1256",
|
||||||
|
"cp1258",
|
||||||
|
"iso8859_14",
|
||||||
|
"iso8859_15",
|
||||||
|
"iso8859_16",
|
||||||
|
"iso8859_3",
|
||||||
|
"iso8859_9",
|
||||||
|
"latin_1",
|
||||||
|
"mbcs"
|
||||||
|
],
|
||||||
|
"language": "French",
|
||||||
|
"alphabets": [
|
||||||
|
"Basic Latin",
|
||||||
|
"Latin-1 Supplement"
|
||||||
|
],
|
||||||
|
"has_sig_or_bom": false,
|
||||||
|
"chaos": 0.149,
|
||||||
|
"coherence": 97.152,
|
||||||
|
"unicode_path": null,
|
||||||
|
"is_preferred": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Python
|
||||||
|
*Just print out normalized text*
|
||||||
|
```python
|
||||||
|
from charset_normalizer import from_path
|
||||||
|
|
||||||
|
results = from_path('./my_subtitle.srt')
|
||||||
|
|
||||||
|
print(str(results.best()))
|
||||||
|
```
|
||||||
|
|
||||||
|
*Normalize any text file*
|
||||||
|
```python
|
||||||
|
from charset_normalizer import normalize
|
||||||
|
try:
|
||||||
|
normalize('./my_subtitle.srt') # should write to disk my_subtitle-***.srt
|
||||||
|
except IOError as e:
|
||||||
|
print('Sadly, we are unable to perform charset normalization.', str(e))
|
||||||
|
```
|
||||||
|
|
||||||
|
*Upgrade your code without effort*
|
||||||
|
```python
|
||||||
|
from charset_normalizer import detect
|
||||||
|
```
|
||||||
|
|
||||||
|
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||||
|
|
||||||
|
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||||
|
|
||||||
|
## 😇 Why
|
||||||
|
|
||||||
|
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||||
|
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||||
|
|
||||||
|
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||||
|
produce **two identical rendered string.**
|
||||||
|
What I want is to get readable text, the best I can.
|
||||||
|
|
||||||
|
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||||
|
|
||||||
|
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||||
|
|
||||||
|
## 🍰 How
|
||||||
|
|
||||||
|
- Discard all charset encoding table that could not fit the binary content.
|
||||||
|
- Measure chaos, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||||
|
- Extract matches with the lowest mess detected.
|
||||||
|
- Additionally, we measure coherence / probe for a language.
|
||||||
|
|
||||||
|
**Wait a minute**, what is chaos/mess and coherence according to **YOU ?**
|
||||||
|
|
||||||
|
*Chaos :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||||
|
**I established** some ground rules about **what is obvious** when **it seems like** a mess.
|
||||||
|
I know that my interpretation of what is chaotic is very subjective, feel free to contribute in order to
|
||||||
|
improve or rewrite it.
|
||||||
|
|
||||||
|
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||||
|
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||||
|
|
||||||
|
## ⚡ Known limitations
|
||||||
|
|
||||||
|
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||||
|
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||||
|
|
||||||
|
## 👤 Contributing
|
||||||
|
|
||||||
|
Contributions, issues and feature requests are very much welcome.<br />
|
||||||
|
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||||
|
|
||||||
|
## 📝 License
|
||||||
|
|
||||||
|
Copyright © 2019 [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||||
|
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||||
|
|
||||||
|
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
@ -0,0 +1,33 @@
|
|||||||
|
../../Scripts/normalizer.exe,sha256=QtFXLYpezh5QF_IU1pywlZ_21fXMu3FXcfwC8TrJ0Lc,106396
|
||||||
|
charset_normalizer-2.1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||||
|
charset_normalizer-2.1.1.dist-info/LICENSE,sha256=6zGgxaT7Cbik4yBV0lweX5w1iidS_vPNcgIT0cz-4kE,1070
|
||||||
|
charset_normalizer-2.1.1.dist-info/METADATA,sha256=C99l12g4d1E9_UiW-mqPCWx7v2M_lYGWxy1GTOjXSsA,11942
|
||||||
|
charset_normalizer-2.1.1.dist-info/RECORD,,
|
||||||
|
charset_normalizer-2.1.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
||||||
|
charset_normalizer-2.1.1.dist-info/entry_points.txt,sha256=uYo8aIGLWv8YgWfSna5HnfY_En4pkF1w4bgawNAXzP0,76
|
||||||
|
charset_normalizer-2.1.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
|
||||||
|
charset_normalizer/__init__.py,sha256=jGhhf1IcOgCpZsr593E9fPvjWKnflVqHe_LwkOJjInU,1790
|
||||||
|
charset_normalizer/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/api.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/cd.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/constant.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/legacy.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/md.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/models.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/utils.cpython-310.pyc,,
|
||||||
|
charset_normalizer/__pycache__/version.cpython-310.pyc,,
|
||||||
|
charset_normalizer/api.py,sha256=euVPmjAMbjpqhEHPjfKtyy1mK52U0TOUBUQgM_Qy6eE,19191
|
||||||
|
charset_normalizer/assets/__init__.py,sha256=r7aakPaRIc2FFG2mw2V8NOTvkl25_euKZ3wPf5SAVa4,15222
|
||||||
|
charset_normalizer/assets/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
charset_normalizer/cd.py,sha256=Pxdkbn4cy0iZF42KTb1FiWIqqKobuz_fDjGwc6JMNBc,10811
|
||||||
|
charset_normalizer/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
charset_normalizer/cli/__pycache__/__init__.cpython-310.pyc,,
|
||||||
|
charset_normalizer/cli/__pycache__/normalizer.cpython-310.pyc,,
|
||||||
|
charset_normalizer/cli/normalizer.py,sha256=FmD1RXeMpRBg_mjR0MaJhNUpM2qZ8wz2neAE7AayBeg,9521
|
||||||
|
charset_normalizer/constant.py,sha256=NgU-pY8JH2a9lkVT8oKwAFmIUYNKOuSBwZgF9MrlNCM,19157
|
||||||
|
charset_normalizer/legacy.py,sha256=XKeZOts_HdYQU_Jb3C9ZfOjY2CiUL132k9_nXer8gig,3384
|
||||||
|
charset_normalizer/md.py,sha256=pZP8IVpSC82D8INA9Tf_y0ijJSRI-UIncZvLdfTWEd4,17642
|
||||||
|
charset_normalizer/models.py,sha256=i68YdlSLTEI3EEBVXq8TLNAbyyjrLC2OWszc-OBAk9I,13167
|
||||||
|
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||||
|
charset_normalizer/utils.py,sha256=ykOznhcAeH-ODLBWJuI7t1nbwa1SAfN_bDYTCJGyh4U,11771
|
||||||
|
charset_normalizer/version.py,sha256=_eh2MA3qS__IajlePQxKBmlw6zaBDvPYlLdEgxgIojw,79
|
@ -0,0 +1,5 @@
|
|||||||
|
Wheel-Version: 1.0
|
||||||
|
Generator: bdist_wheel (0.37.1)
|
||||||
|
Root-Is-Purelib: true
|
||||||
|
Tag: py3-none-any
|
||||||
|
|
@ -0,0 +1,2 @@
|
|||||||
|
[console_scripts]
|
||||||
|
normalizer = charset_normalizer.cli.normalizer:cli_detect
|
@ -0,0 +1 @@
|
|||||||
|
charset_normalizer
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,584 @@
|
|||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
from os import PathLike
|
||||||
|
from os.path import basename, splitext
|
||||||
|
from typing import Any, BinaryIO, List, Optional, Set
|
||||||
|
|
||||||
|
from .cd import (
|
||||||
|
coherence_ratio,
|
||||||
|
encoding_languages,
|
||||||
|
mb_encoding_languages,
|
||||||
|
merge_coherence_ratios,
|
||||||
|
)
|
||||||
|
from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
|
||||||
|
from .md import mess_ratio
|
||||||
|
from .models import CharsetMatch, CharsetMatches
|
||||||
|
from .utils import (
|
||||||
|
any_specified_encoding,
|
||||||
|
cut_sequence_chunks,
|
||||||
|
iana_name,
|
||||||
|
identify_sig_or_bom,
|
||||||
|
is_cp_similar,
|
||||||
|
is_multi_byte_encoding,
|
||||||
|
should_strip_sig_or_bom,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Will most likely be controversial
|
||||||
|
# logging.addLevelName(TRACE, "TRACE")
|
||||||
|
logger = logging.getLogger("charset_normalizer")
|
||||||
|
explain_handler = logging.StreamHandler()
|
||||||
|
explain_handler.setFormatter(
|
||||||
|
logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def from_bytes(
|
||||||
|
sequences: bytes,
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.2,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
explain: bool = False,
|
||||||
|
) -> CharsetMatches:
|
||||||
|
"""
|
||||||
|
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
|
||||||
|
If there is no results, it is a strong indicator that the source is binary/not text.
|
||||||
|
By default, the process will extract 5 blocs of 512o each to assess the mess and coherence of a given sequence.
|
||||||
|
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
|
||||||
|
|
||||||
|
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
|
||||||
|
but never take it for granted. Can improve the performance.
|
||||||
|
|
||||||
|
You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
|
||||||
|
purpose.
|
||||||
|
|
||||||
|
This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
|
||||||
|
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
|
||||||
|
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
|
||||||
|
Custom logging format and handler can be set manually.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not isinstance(sequences, (bytearray, bytes)):
|
||||||
|
raise TypeError(
|
||||||
|
"Expected object of type bytes or bytearray, got: {0}".format(
|
||||||
|
type(sequences)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if explain:
|
||||||
|
previous_logger_level: int = logger.level
|
||||||
|
logger.addHandler(explain_handler)
|
||||||
|
logger.setLevel(TRACE)
|
||||||
|
|
||||||
|
length: int = len(sequences)
|
||||||
|
|
||||||
|
if length == 0:
|
||||||
|
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level or logging.WARNING)
|
||||||
|
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
|
||||||
|
|
||||||
|
if cp_isolation is not None:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"cp_isolation is set. use this flag for debugging purpose. "
|
||||||
|
"limited list of encoding allowed : %s.",
|
||||||
|
", ".join(cp_isolation),
|
||||||
|
)
|
||||||
|
cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
|
||||||
|
else:
|
||||||
|
cp_isolation = []
|
||||||
|
|
||||||
|
if cp_exclusion is not None:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"cp_exclusion is set. use this flag for debugging purpose. "
|
||||||
|
"limited list of encoding excluded : %s.",
|
||||||
|
", ".join(cp_exclusion),
|
||||||
|
)
|
||||||
|
cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
|
||||||
|
else:
|
||||||
|
cp_exclusion = []
|
||||||
|
|
||||||
|
if length <= (chunk_size * steps):
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
|
||||||
|
steps,
|
||||||
|
chunk_size,
|
||||||
|
length,
|
||||||
|
)
|
||||||
|
steps = 1
|
||||||
|
chunk_size = length
|
||||||
|
|
||||||
|
if steps > 1 and length / steps < chunk_size:
|
||||||
|
chunk_size = int(length / steps)
|
||||||
|
|
||||||
|
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
|
||||||
|
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
|
||||||
|
|
||||||
|
if is_too_small_sequence:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
|
||||||
|
length
|
||||||
|
),
|
||||||
|
)
|
||||||
|
elif is_too_large_sequence:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
|
||||||
|
length
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
prioritized_encodings: List[str] = []
|
||||||
|
|
||||||
|
specified_encoding: Optional[str] = (
|
||||||
|
any_specified_encoding(sequences) if preemptive_behaviour else None
|
||||||
|
)
|
||||||
|
|
||||||
|
if specified_encoding is not None:
|
||||||
|
prioritized_encodings.append(specified_encoding)
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Detected declarative mark in sequence. Priority +1 given for %s.",
|
||||||
|
specified_encoding,
|
||||||
|
)
|
||||||
|
|
||||||
|
tested: Set[str] = set()
|
||||||
|
tested_but_hard_failure: List[str] = []
|
||||||
|
tested_but_soft_failure: List[str] = []
|
||||||
|
|
||||||
|
fallback_ascii: Optional[CharsetMatch] = None
|
||||||
|
fallback_u8: Optional[CharsetMatch] = None
|
||||||
|
fallback_specified: Optional[CharsetMatch] = None
|
||||||
|
|
||||||
|
results: CharsetMatches = CharsetMatches()
|
||||||
|
|
||||||
|
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
|
||||||
|
|
||||||
|
if sig_encoding is not None:
|
||||||
|
prioritized_encodings.append(sig_encoding)
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
|
||||||
|
len(sig_payload),
|
||||||
|
sig_encoding,
|
||||||
|
)
|
||||||
|
|
||||||
|
prioritized_encodings.append("ascii")
|
||||||
|
|
||||||
|
if "utf_8" not in prioritized_encodings:
|
||||||
|
prioritized_encodings.append("utf_8")
|
||||||
|
|
||||||
|
for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
|
||||||
|
|
||||||
|
if cp_isolation and encoding_iana not in cp_isolation:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if cp_exclusion and encoding_iana in cp_exclusion:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if encoding_iana in tested:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tested.add(encoding_iana)
|
||||||
|
|
||||||
|
decoded_payload: Optional[str] = None
|
||||||
|
bom_or_sig_available: bool = sig_encoding == encoding_iana
|
||||||
|
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
|
||||||
|
encoding_iana
|
||||||
|
)
|
||||||
|
|
||||||
|
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Encoding %s does not provide an IncrementalDecoder",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
if is_too_large_sequence and is_multi_byte_decoder is False:
|
||||||
|
str(
|
||||||
|
sequences[: int(50e4)]
|
||||||
|
if strip_sig_or_bom is False
|
||||||
|
else sequences[len(sig_payload) : int(50e4)],
|
||||||
|
encoding=encoding_iana,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
decoded_payload = str(
|
||||||
|
sequences
|
||||||
|
if strip_sig_or_bom is False
|
||||||
|
else sequences[len(sig_payload) :],
|
||||||
|
encoding=encoding_iana,
|
||||||
|
)
|
||||||
|
except (UnicodeDecodeError, LookupError) as e:
|
||||||
|
if not isinstance(e, LookupError):
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Code page %s does not fit given bytes sequence at ALL. %s",
|
||||||
|
encoding_iana,
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
tested_but_hard_failure.append(encoding_iana)
|
||||||
|
continue
|
||||||
|
|
||||||
|
similar_soft_failure_test: bool = False
|
||||||
|
|
||||||
|
for encoding_soft_failed in tested_but_soft_failure:
|
||||||
|
if is_cp_similar(encoding_iana, encoding_soft_failed):
|
||||||
|
similar_soft_failure_test = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if similar_soft_failure_test:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
|
||||||
|
encoding_iana,
|
||||||
|
encoding_soft_failed,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
r_ = range(
|
||||||
|
0 if not bom_or_sig_available else len(sig_payload),
|
||||||
|
length,
|
||||||
|
int(length / steps),
|
||||||
|
)
|
||||||
|
|
||||||
|
multi_byte_bonus: bool = (
|
||||||
|
is_multi_byte_decoder
|
||||||
|
and decoded_payload is not None
|
||||||
|
and len(decoded_payload) < length
|
||||||
|
)
|
||||||
|
|
||||||
|
if multi_byte_bonus:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Code page %s is a multi byte encoding table and it appear that at least one character "
|
||||||
|
"was encoded using n-bytes.",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
|
||||||
|
max_chunk_gave_up: int = int(len(r_) / 4)
|
||||||
|
|
||||||
|
max_chunk_gave_up = max(max_chunk_gave_up, 2)
|
||||||
|
early_stop_count: int = 0
|
||||||
|
lazy_str_hard_failure = False
|
||||||
|
|
||||||
|
md_chunks: List[str] = []
|
||||||
|
md_ratios = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
for chunk in cut_sequence_chunks(
|
||||||
|
sequences,
|
||||||
|
encoding_iana,
|
||||||
|
r_,
|
||||||
|
chunk_size,
|
||||||
|
bom_or_sig_available,
|
||||||
|
strip_sig_or_bom,
|
||||||
|
sig_payload,
|
||||||
|
is_multi_byte_decoder,
|
||||||
|
decoded_payload,
|
||||||
|
):
|
||||||
|
md_chunks.append(chunk)
|
||||||
|
|
||||||
|
md_ratios.append(mess_ratio(chunk, threshold))
|
||||||
|
|
||||||
|
if md_ratios[-1] >= threshold:
|
||||||
|
early_stop_count += 1
|
||||||
|
|
||||||
|
if (early_stop_count >= max_chunk_gave_up) or (
|
||||||
|
bom_or_sig_available and strip_sig_or_bom is False
|
||||||
|
):
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError as e: # Lazy str loading may have missed something there
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
|
||||||
|
encoding_iana,
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
early_stop_count = max_chunk_gave_up
|
||||||
|
lazy_str_hard_failure = True
|
||||||
|
|
||||||
|
# We might want to check the sequence again with the whole content
|
||||||
|
# Only if initial MD tests passes
|
||||||
|
if (
|
||||||
|
not lazy_str_hard_failure
|
||||||
|
and is_too_large_sequence
|
||||||
|
and not is_multi_byte_decoder
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
sequences[int(50e3) :].decode(encoding_iana, errors="strict")
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
|
||||||
|
encoding_iana,
|
||||||
|
str(e),
|
||||||
|
)
|
||||||
|
tested_but_hard_failure.append(encoding_iana)
|
||||||
|
continue
|
||||||
|
|
||||||
|
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
|
||||||
|
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
|
||||||
|
tested_but_soft_failure.append(encoding_iana)
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
|
||||||
|
"Computed mean chaos is %f %%.",
|
||||||
|
encoding_iana,
|
||||||
|
early_stop_count,
|
||||||
|
round(mean_mess_ratio * 100, ndigits=3),
|
||||||
|
)
|
||||||
|
# Preparing those fallbacks in case we got nothing.
|
||||||
|
if (
|
||||||
|
encoding_iana in ["ascii", "utf_8", specified_encoding]
|
||||||
|
and not lazy_str_hard_failure
|
||||||
|
):
|
||||||
|
fallback_entry = CharsetMatch(
|
||||||
|
sequences, encoding_iana, threshold, False, [], decoded_payload
|
||||||
|
)
|
||||||
|
if encoding_iana == specified_encoding:
|
||||||
|
fallback_specified = fallback_entry
|
||||||
|
elif encoding_iana == "ascii":
|
||||||
|
fallback_ascii = fallback_entry
|
||||||
|
else:
|
||||||
|
fallback_u8 = fallback_entry
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"%s passed initial chaos probing. Mean measured chaos is %f %%",
|
||||||
|
encoding_iana,
|
||||||
|
round(mean_mess_ratio * 100, ndigits=3),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not is_multi_byte_decoder:
|
||||||
|
target_languages: List[str] = encoding_languages(encoding_iana)
|
||||||
|
else:
|
||||||
|
target_languages = mb_encoding_languages(encoding_iana)
|
||||||
|
|
||||||
|
if target_languages:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"{} should target any language(s) of {}".format(
|
||||||
|
encoding_iana, str(target_languages)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
cd_ratios = []
|
||||||
|
|
||||||
|
# We shall skip the CD when its about ASCII
|
||||||
|
# Most of the time its not relevant to run "language-detection" on it.
|
||||||
|
if encoding_iana != "ascii":
|
||||||
|
for chunk in md_chunks:
|
||||||
|
chunk_languages = coherence_ratio(
|
||||||
|
chunk, 0.1, ",".join(target_languages) if target_languages else None
|
||||||
|
)
|
||||||
|
|
||||||
|
cd_ratios.append(chunk_languages)
|
||||||
|
|
||||||
|
cd_ratios_merged = merge_coherence_ratios(cd_ratios)
|
||||||
|
|
||||||
|
if cd_ratios_merged:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"We detected language {} using {}".format(
|
||||||
|
cd_ratios_merged, encoding_iana
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
CharsetMatch(
|
||||||
|
sequences,
|
||||||
|
encoding_iana,
|
||||||
|
mean_mess_ratio,
|
||||||
|
bom_or_sig_available,
|
||||||
|
cd_ratios_merged,
|
||||||
|
decoded_payload,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
encoding_iana in [specified_encoding, "ascii", "utf_8"]
|
||||||
|
and mean_mess_ratio < 0.1
|
||||||
|
):
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: %s is most likely the one.", encoding_iana
|
||||||
|
)
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level)
|
||||||
|
return CharsetMatches([results[encoding_iana]])
|
||||||
|
|
||||||
|
if encoding_iana == sig_encoding:
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
|
||||||
|
"the beginning of the sequence.",
|
||||||
|
encoding_iana,
|
||||||
|
)
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level)
|
||||||
|
return CharsetMatches([results[encoding_iana]])
|
||||||
|
|
||||||
|
if len(results) == 0:
|
||||||
|
if fallback_u8 or fallback_ascii or fallback_specified:
|
||||||
|
logger.log(
|
||||||
|
TRACE,
|
||||||
|
"Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
|
||||||
|
)
|
||||||
|
|
||||||
|
if fallback_specified:
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: %s will be used as a fallback match",
|
||||||
|
fallback_specified.encoding,
|
||||||
|
)
|
||||||
|
results.append(fallback_specified)
|
||||||
|
elif (
|
||||||
|
(fallback_u8 and fallback_ascii is None)
|
||||||
|
or (
|
||||||
|
fallback_u8
|
||||||
|
and fallback_ascii
|
||||||
|
and fallback_u8.fingerprint != fallback_ascii.fingerprint
|
||||||
|
)
|
||||||
|
or (fallback_u8 is not None)
|
||||||
|
):
|
||||||
|
logger.debug("Encoding detection: utf_8 will be used as a fallback match")
|
||||||
|
results.append(fallback_u8)
|
||||||
|
elif fallback_ascii:
|
||||||
|
logger.debug("Encoding detection: ascii will be used as a fallback match")
|
||||||
|
results.append(fallback_ascii)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
logger.debug(
|
||||||
|
"Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
|
||||||
|
results.best().encoding, # type: ignore
|
||||||
|
len(results) - 1,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug("Encoding detection: Unable to determine any suitable charset.")
|
||||||
|
|
||||||
|
if explain:
|
||||||
|
logger.removeHandler(explain_handler)
|
||||||
|
logger.setLevel(previous_logger_level)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def from_fp(
|
||||||
|
fp: BinaryIO,
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.20,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
explain: bool = False,
|
||||||
|
) -> CharsetMatches:
|
||||||
|
"""
|
||||||
|
Same thing than the function from_bytes but using a file pointer that is already ready.
|
||||||
|
Will not close the file pointer.
|
||||||
|
"""
|
||||||
|
return from_bytes(
|
||||||
|
fp.read(),
|
||||||
|
steps,
|
||||||
|
chunk_size,
|
||||||
|
threshold,
|
||||||
|
cp_isolation,
|
||||||
|
cp_exclusion,
|
||||||
|
preemptive_behaviour,
|
||||||
|
explain,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def from_path(
|
||||||
|
path: "PathLike[Any]",
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.20,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
explain: bool = False,
|
||||||
|
) -> CharsetMatches:
|
||||||
|
"""
|
||||||
|
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
|
||||||
|
Can raise IOError.
|
||||||
|
"""
|
||||||
|
with open(path, "rb") as fp:
|
||||||
|
return from_fp(
|
||||||
|
fp,
|
||||||
|
steps,
|
||||||
|
chunk_size,
|
||||||
|
threshold,
|
||||||
|
cp_isolation,
|
||||||
|
cp_exclusion,
|
||||||
|
preemptive_behaviour,
|
||||||
|
explain,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(
|
||||||
|
path: "PathLike[Any]",
|
||||||
|
steps: int = 5,
|
||||||
|
chunk_size: int = 512,
|
||||||
|
threshold: float = 0.20,
|
||||||
|
cp_isolation: Optional[List[str]] = None,
|
||||||
|
cp_exclusion: Optional[List[str]] = None,
|
||||||
|
preemptive_behaviour: bool = True,
|
||||||
|
) -> CharsetMatch:
|
||||||
|
"""
|
||||||
|
Take a (text-based) file path and try to create another file next to it, this time using UTF-8.
|
||||||
|
"""
|
||||||
|
warnings.warn(
|
||||||
|
"normalize is deprecated and will be removed in 3.0",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
|
||||||
|
results = from_path(
|
||||||
|
path,
|
||||||
|
steps,
|
||||||
|
chunk_size,
|
||||||
|
threshold,
|
||||||
|
cp_isolation,
|
||||||
|
cp_exclusion,
|
||||||
|
preemptive_behaviour,
|
||||||
|
)
|
||||||
|
|
||||||
|
filename = basename(path)
|
||||||
|
target_extensions = list(splitext(filename))
|
||||||
|
|
||||||
|
if len(results) == 0:
|
||||||
|
raise IOError(
|
||||||
|
'Unable to normalize "{}", no encoding charset seems to fit.'.format(
|
||||||
|
filename
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
result = results.best()
|
||||||
|
|
||||||
|
target_extensions[0] += "-" + result.encoding # type: ignore
|
||||||
|
|
||||||
|
with open(
|
||||||
|
"{}".format(str(path).replace(filename, "".join(target_extensions))), "wb"
|
||||||
|
) as fp:
|
||||||
|
fp.write(result.output()) # type: ignore
|
||||||
|
|
||||||
|
return result # type: ignore
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -0,0 +1,339 @@
|
|||||||
|
import importlib
|
||||||
|
from codecs import IncrementalDecoder
|
||||||
|
from collections import Counter
|
||||||
|
from functools import lru_cache
|
||||||
|
from typing import Counter as TypeCounter, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from .assets import FREQUENCIES
|
||||||
|
from .constant import KO_NAMES, LANGUAGE_SUPPORTED_COUNT, TOO_SMALL_SEQUENCE, ZH_NAMES
|
||||||
|
from .md import is_suspiciously_successive_range
|
||||||
|
from .models import CoherenceMatches
|
||||||
|
from .utils import (
|
||||||
|
is_accentuated,
|
||||||
|
is_latin,
|
||||||
|
is_multi_byte_encoding,
|
||||||
|
is_unicode_range_secondary,
|
||||||
|
unicode_range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def encoding_unicode_range(iana_name: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return associated unicode ranges in a single byte code page.
|
||||||
|
"""
|
||||||
|
if is_multi_byte_encoding(iana_name):
|
||||||
|
raise IOError("Function not supported on multi-byte code page")
|
||||||
|
|
||||||
|
decoder = importlib.import_module(
|
||||||
|
"encodings.{}".format(iana_name)
|
||||||
|
).IncrementalDecoder
|
||||||
|
|
||||||
|
p: IncrementalDecoder = decoder(errors="ignore")
|
||||||
|
seen_ranges: Dict[str, int] = {}
|
||||||
|
character_count: int = 0
|
||||||
|
|
||||||
|
for i in range(0x40, 0xFF):
|
||||||
|
chunk: str = p.decode(bytes([i]))
|
||||||
|
|
||||||
|
if chunk:
|
||||||
|
character_range: Optional[str] = unicode_range(chunk)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if is_unicode_range_secondary(character_range) is False:
|
||||||
|
if character_range not in seen_ranges:
|
||||||
|
seen_ranges[character_range] = 0
|
||||||
|
seen_ranges[character_range] += 1
|
||||||
|
character_count += 1
|
||||||
|
|
||||||
|
return sorted(
|
||||||
|
[
|
||||||
|
character_range
|
||||||
|
for character_range in seen_ranges
|
||||||
|
if seen_ranges[character_range] / character_count >= 0.15
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def unicode_range_languages(primary_range: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return inferred languages used with a unicode range.
|
||||||
|
"""
|
||||||
|
languages: List[str] = []
|
||||||
|
|
||||||
|
for language, characters in FREQUENCIES.items():
|
||||||
|
for character in characters:
|
||||||
|
if unicode_range(character) == primary_range:
|
||||||
|
languages.append(language)
|
||||||
|
break
|
||||||
|
|
||||||
|
return languages
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache()
|
||||||
|
def encoding_languages(iana_name: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Single-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||||
|
This function does the correspondence.
|
||||||
|
"""
|
||||||
|
unicode_ranges: List[str] = encoding_unicode_range(iana_name)
|
||||||
|
primary_range: Optional[str] = None
|
||||||
|
|
||||||
|
for specified_range in unicode_ranges:
|
||||||
|
if "Latin" not in specified_range:
|
||||||
|
primary_range = specified_range
|
||||||
|
break
|
||||||
|
|
||||||
|
if primary_range is None:
|
||||||
|
return ["Latin Based"]
|
||||||
|
|
||||||
|
return unicode_range_languages(primary_range)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache()
|
||||||
|
def mb_encoding_languages(iana_name: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
|
||||||
|
This function does the correspondence.
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
iana_name.startswith("shift_")
|
||||||
|
or iana_name.startswith("iso2022_jp")
|
||||||
|
or iana_name.startswith("euc_j")
|
||||||
|
or iana_name == "cp932"
|
||||||
|
):
|
||||||
|
return ["Japanese"]
|
||||||
|
if iana_name.startswith("gb") or iana_name in ZH_NAMES:
|
||||||
|
return ["Chinese", "Classical Chinese"]
|
||||||
|
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
|
||||||
|
return ["Korean"]
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
|
||||||
|
def get_target_features(language: str) -> Tuple[bool, bool]:
|
||||||
|
"""
|
||||||
|
Determine main aspects from a supported language if it contains accents and if is pure Latin.
|
||||||
|
"""
|
||||||
|
target_have_accents: bool = False
|
||||||
|
target_pure_latin: bool = True
|
||||||
|
|
||||||
|
for character in FREQUENCIES[language]:
|
||||||
|
if not target_have_accents and is_accentuated(character):
|
||||||
|
target_have_accents = True
|
||||||
|
if target_pure_latin and is_latin(character) is False:
|
||||||
|
target_pure_latin = False
|
||||||
|
|
||||||
|
return target_have_accents, target_pure_latin
|
||||||
|
|
||||||
|
|
||||||
|
def alphabet_languages(
|
||||||
|
characters: List[str], ignore_non_latin: bool = False
|
||||||
|
) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return associated languages associated to given characters.
|
||||||
|
"""
|
||||||
|
languages: List[Tuple[str, float]] = []
|
||||||
|
|
||||||
|
source_have_accents = any(is_accentuated(character) for character in characters)
|
||||||
|
|
||||||
|
for language, language_characters in FREQUENCIES.items():
|
||||||
|
|
||||||
|
target_have_accents, target_pure_latin = get_target_features(language)
|
||||||
|
|
||||||
|
if ignore_non_latin and target_pure_latin is False:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if target_have_accents is False and source_have_accents:
|
||||||
|
continue
|
||||||
|
|
||||||
|
character_count: int = len(language_characters)
|
||||||
|
|
||||||
|
character_match_count: int = len(
|
||||||
|
[c for c in language_characters if c in characters]
|
||||||
|
)
|
||||||
|
|
||||||
|
ratio: float = character_match_count / character_count
|
||||||
|
|
||||||
|
if ratio >= 0.2:
|
||||||
|
languages.append((language, ratio))
|
||||||
|
|
||||||
|
languages = sorted(languages, key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
return [compatible_language[0] for compatible_language in languages]
|
||||||
|
|
||||||
|
|
||||||
|
def characters_popularity_compare(
|
||||||
|
language: str, ordered_characters: List[str]
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
|
||||||
|
The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
|
||||||
|
Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
|
||||||
|
"""
|
||||||
|
if language not in FREQUENCIES:
|
||||||
|
raise ValueError("{} not available".format(language))
|
||||||
|
|
||||||
|
character_approved_count: int = 0
|
||||||
|
FREQUENCIES_language_set = set(FREQUENCIES[language])
|
||||||
|
|
||||||
|
for character in ordered_characters:
|
||||||
|
if character not in FREQUENCIES_language_set:
|
||||||
|
continue
|
||||||
|
|
||||||
|
characters_before_source: List[str] = FREQUENCIES[language][
|
||||||
|
0 : FREQUENCIES[language].index(character)
|
||||||
|
]
|
||||||
|
characters_after_source: List[str] = FREQUENCIES[language][
|
||||||
|
FREQUENCIES[language].index(character) :
|
||||||
|
]
|
||||||
|
characters_before: List[str] = ordered_characters[
|
||||||
|
0 : ordered_characters.index(character)
|
||||||
|
]
|
||||||
|
characters_after: List[str] = ordered_characters[
|
||||||
|
ordered_characters.index(character) :
|
||||||
|
]
|
||||||
|
|
||||||
|
before_match_count: int = len(
|
||||||
|
set(characters_before) & set(characters_before_source)
|
||||||
|
)
|
||||||
|
|
||||||
|
after_match_count: int = len(
|
||||||
|
set(characters_after) & set(characters_after_source)
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(characters_before_source) == 0 and before_match_count <= 4:
|
||||||
|
character_approved_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(characters_after_source) == 0 and after_match_count <= 4:
|
||||||
|
character_approved_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (
|
||||||
|
before_match_count / len(characters_before_source) >= 0.4
|
||||||
|
or after_match_count / len(characters_after_source) >= 0.4
|
||||||
|
):
|
||||||
|
character_approved_count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
return character_approved_count / len(ordered_characters)
|
||||||
|
|
||||||
|
|
||||||
|
def alpha_unicode_split(decoded_sequence: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
|
||||||
|
Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
|
||||||
|
One containing the latin letters and the other hebrew.
|
||||||
|
"""
|
||||||
|
layers: Dict[str, str] = {}
|
||||||
|
|
||||||
|
for character in decoded_sequence:
|
||||||
|
if character.isalpha() is False:
|
||||||
|
continue
|
||||||
|
|
||||||
|
character_range: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if character_range is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
layer_target_range: Optional[str] = None
|
||||||
|
|
||||||
|
for discovered_range in layers:
|
||||||
|
if (
|
||||||
|
is_suspiciously_successive_range(discovered_range, character_range)
|
||||||
|
is False
|
||||||
|
):
|
||||||
|
layer_target_range = discovered_range
|
||||||
|
break
|
||||||
|
|
||||||
|
if layer_target_range is None:
|
||||||
|
layer_target_range = character_range
|
||||||
|
|
||||||
|
if layer_target_range not in layers:
|
||||||
|
layers[layer_target_range] = character.lower()
|
||||||
|
continue
|
||||||
|
|
||||||
|
layers[layer_target_range] += character.lower()
|
||||||
|
|
||||||
|
return list(layers.values())
|
||||||
|
|
||||||
|
|
||||||
|
def merge_coherence_ratios(results: List[CoherenceMatches]) -> CoherenceMatches:
|
||||||
|
"""
|
||||||
|
This function merge results previously given by the function coherence_ratio.
|
||||||
|
The return type is the same as coherence_ratio.
|
||||||
|
"""
|
||||||
|
per_language_ratios: Dict[str, List[float]] = {}
|
||||||
|
for result in results:
|
||||||
|
for sub_result in result:
|
||||||
|
language, ratio = sub_result
|
||||||
|
if language not in per_language_ratios:
|
||||||
|
per_language_ratios[language] = [ratio]
|
||||||
|
continue
|
||||||
|
per_language_ratios[language].append(ratio)
|
||||||
|
|
||||||
|
merge = [
|
||||||
|
(
|
||||||
|
language,
|
||||||
|
round(
|
||||||
|
sum(per_language_ratios[language]) / len(per_language_ratios[language]),
|
||||||
|
4,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for language in per_language_ratios
|
||||||
|
]
|
||||||
|
|
||||||
|
return sorted(merge, key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=2048)
|
||||||
|
def coherence_ratio(
|
||||||
|
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: Optional[str] = None
|
||||||
|
) -> CoherenceMatches:
|
||||||
|
"""
|
||||||
|
Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
|
||||||
|
A layer = Character extraction by alphabets/ranges.
|
||||||
|
"""
|
||||||
|
|
||||||
|
results: List[Tuple[str, float]] = []
|
||||||
|
ignore_non_latin: bool = False
|
||||||
|
|
||||||
|
sufficient_match_count: int = 0
|
||||||
|
|
||||||
|
lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
|
||||||
|
if "Latin Based" in lg_inclusion_list:
|
||||||
|
ignore_non_latin = True
|
||||||
|
lg_inclusion_list.remove("Latin Based")
|
||||||
|
|
||||||
|
for layer in alpha_unicode_split(decoded_sequence):
|
||||||
|
sequence_frequencies: TypeCounter[str] = Counter(layer)
|
||||||
|
most_common = sequence_frequencies.most_common()
|
||||||
|
|
||||||
|
character_count: int = sum(o for c, o in most_common)
|
||||||
|
|
||||||
|
if character_count <= TOO_SMALL_SEQUENCE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
popular_character_ordered: List[str] = [c for c, o in most_common]
|
||||||
|
|
||||||
|
for language in lg_inclusion_list or alphabet_languages(
|
||||||
|
popular_character_ordered, ignore_non_latin
|
||||||
|
):
|
||||||
|
ratio: float = characters_popularity_compare(
|
||||||
|
language, popular_character_ordered
|
||||||
|
)
|
||||||
|
|
||||||
|
if ratio < threshold:
|
||||||
|
continue
|
||||||
|
elif ratio >= 0.8:
|
||||||
|
sufficient_match_count += 1
|
||||||
|
|
||||||
|
results.append((language, round(ratio, 4)))
|
||||||
|
|
||||||
|
if sufficient_match_count >= 3:
|
||||||
|
break
|
||||||
|
|
||||||
|
return sorted(results, key=lambda x: x[1], reverse=True)
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,295 @@
|
|||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from json import dumps
|
||||||
|
from os.path import abspath
|
||||||
|
from platform import python_version
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
try:
|
||||||
|
from unicodedata2 import unidata_version
|
||||||
|
except ImportError:
|
||||||
|
from unicodedata import unidata_version
|
||||||
|
|
||||||
|
from charset_normalizer import from_fp
|
||||||
|
from charset_normalizer.models import CliDetectionResult
|
||||||
|
from charset_normalizer.version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
def query_yes_no(question: str, default: str = "yes") -> bool:
|
||||||
|
"""Ask a yes/no question via input() and return their answer.
|
||||||
|
|
||||||
|
"question" is a string that is presented to the user.
|
||||||
|
"default" is the presumed answer if the user just hits <Enter>.
|
||||||
|
It must be "yes" (the default), "no" or None (meaning
|
||||||
|
an answer is required of the user).
|
||||||
|
|
||||||
|
The "answer" return value is True for "yes" or False for "no".
|
||||||
|
|
||||||
|
Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
|
||||||
|
"""
|
||||||
|
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
|
||||||
|
if default is None:
|
||||||
|
prompt = " [y/n] "
|
||||||
|
elif default == "yes":
|
||||||
|
prompt = " [Y/n] "
|
||||||
|
elif default == "no":
|
||||||
|
prompt = " [y/N] "
|
||||||
|
else:
|
||||||
|
raise ValueError("invalid default answer: '%s'" % default)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
sys.stdout.write(question + prompt)
|
||||||
|
choice = input().lower()
|
||||||
|
if default is not None and choice == "":
|
||||||
|
return valid[default]
|
||||||
|
elif choice in valid:
|
||||||
|
return valid[choice]
|
||||||
|
else:
|
||||||
|
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
|
||||||
|
|
||||||
|
|
||||||
|
def cli_detect(argv: Optional[List[str]] = None) -> int:
|
||||||
|
"""
|
||||||
|
CLI assistant using ARGV and ArgumentParser
|
||||||
|
:param argv:
|
||||||
|
:return: 0 if everything is fine, anything else equal trouble
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="The Real First Universal Charset Detector. "
|
||||||
|
"Discover originating encoding used on text file. "
|
||||||
|
"Normalize text to unicode."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-v",
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="verbose",
|
||||||
|
help="Display complementary information about file if any. "
|
||||||
|
"Stdout will contain logs about the detection process.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-a",
|
||||||
|
"--with-alternative",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="alternatives",
|
||||||
|
help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-n",
|
||||||
|
"--normalize",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="normalize",
|
||||||
|
help="Permit to normalize input file. If not set, program does not write anything.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-m",
|
||||||
|
"--minimal",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="minimal",
|
||||||
|
help="Only output the charset detected to STDOUT. Disabling JSON output.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-r",
|
||||||
|
"--replace",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="replace",
|
||||||
|
help="Replace file when trying to normalize it instead of creating a new one.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-f",
|
||||||
|
"--force",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="force",
|
||||||
|
help="Replace file without asking if you are sure, use this flag with caution.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-t",
|
||||||
|
"--threshold",
|
||||||
|
action="store",
|
||||||
|
default=0.2,
|
||||||
|
type=float,
|
||||||
|
dest="threshold",
|
||||||
|
help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--version",
|
||||||
|
action="version",
|
||||||
|
version="Charset-Normalizer {} - Python {} - Unicode {}".format(
|
||||||
|
__version__, python_version(), unidata_version
|
||||||
|
),
|
||||||
|
help="Show version information and exit.",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
if args.replace is True and args.normalize is False:
|
||||||
|
print("Use --replace in addition of --normalize only.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.force is True and args.replace is False:
|
||||||
|
print("Use --force in addition of --replace only.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.threshold < 0.0 or args.threshold > 1.0:
|
||||||
|
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
x_ = []
|
||||||
|
|
||||||
|
for my_file in args.files:
|
||||||
|
|
||||||
|
matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)
|
||||||
|
|
||||||
|
best_guess = matches.best()
|
||||||
|
|
||||||
|
if best_guess is None:
|
||||||
|
print(
|
||||||
|
'Unable to identify originating encoding for "{}". {}'.format(
|
||||||
|
my_file.name,
|
||||||
|
"Maybe try increasing maximum amount of chaos."
|
||||||
|
if args.threshold < 1.0
|
||||||
|
else "",
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
x_.append(
|
||||||
|
CliDetectionResult(
|
||||||
|
abspath(my_file.name),
|
||||||
|
None,
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
"Unknown",
|
||||||
|
[],
|
||||||
|
False,
|
||||||
|
1.0,
|
||||||
|
0.0,
|
||||||
|
None,
|
||||||
|
True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
x_.append(
|
||||||
|
CliDetectionResult(
|
||||||
|
abspath(my_file.name),
|
||||||
|
best_guess.encoding,
|
||||||
|
best_guess.encoding_aliases,
|
||||||
|
[
|
||||||
|
cp
|
||||||
|
for cp in best_guess.could_be_from_charset
|
||||||
|
if cp != best_guess.encoding
|
||||||
|
],
|
||||||
|
best_guess.language,
|
||||||
|
best_guess.alphabets,
|
||||||
|
best_guess.bom,
|
||||||
|
best_guess.percent_chaos,
|
||||||
|
best_guess.percent_coherence,
|
||||||
|
None,
|
||||||
|
True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(matches) > 1 and args.alternatives:
|
||||||
|
for el in matches:
|
||||||
|
if el != best_guess:
|
||||||
|
x_.append(
|
||||||
|
CliDetectionResult(
|
||||||
|
abspath(my_file.name),
|
||||||
|
el.encoding,
|
||||||
|
el.encoding_aliases,
|
||||||
|
[
|
||||||
|
cp
|
||||||
|
for cp in el.could_be_from_charset
|
||||||
|
if cp != el.encoding
|
||||||
|
],
|
||||||
|
el.language,
|
||||||
|
el.alphabets,
|
||||||
|
el.bom,
|
||||||
|
el.percent_chaos,
|
||||||
|
el.percent_coherence,
|
||||||
|
None,
|
||||||
|
False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.normalize is True:
|
||||||
|
|
||||||
|
if best_guess.encoding.startswith("utf") is True:
|
||||||
|
print(
|
||||||
|
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
|
||||||
|
my_file.name
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
continue
|
||||||
|
|
||||||
|
o_: List[str] = my_file.name.split(".")
|
||||||
|
|
||||||
|
if args.replace is False:
|
||||||
|
o_.insert(-1, best_guess.encoding)
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
elif (
|
||||||
|
args.force is False
|
||||||
|
and query_yes_no(
|
||||||
|
'Are you sure to normalize "{}" by replacing it ?'.format(
|
||||||
|
my_file.name
|
||||||
|
),
|
||||||
|
"no",
|
||||||
|
)
|
||||||
|
is False
|
||||||
|
):
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
x_[0].unicode_path = abspath("./{}".format(".".join(o_)))
|
||||||
|
|
||||||
|
with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
|
||||||
|
fp.write(str(best_guess))
|
||||||
|
except IOError as e:
|
||||||
|
print(str(e), file=sys.stderr)
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if my_file.closed is False:
|
||||||
|
my_file.close()
|
||||||
|
|
||||||
|
if args.minimal is False:
|
||||||
|
print(
|
||||||
|
dumps(
|
||||||
|
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
|
||||||
|
ensure_ascii=True,
|
||||||
|
indent=4,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for my_file in args.files:
|
||||||
|
print(
|
||||||
|
", ".join(
|
||||||
|
[
|
||||||
|
el.encoding or "undefined"
|
||||||
|
for el in x_
|
||||||
|
if el.path == abspath(my_file.name)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cli_detect()
|
@ -0,0 +1,497 @@
|
|||||||
|
from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
|
||||||
|
from encodings.aliases import aliases
|
||||||
|
from re import IGNORECASE, compile as re_compile
|
||||||
|
from typing import Dict, List, Set, Union
|
||||||
|
|
||||||
|
from .assets import FREQUENCIES
|
||||||
|
|
||||||
|
# Contain for each eligible encoding a list of/item bytes SIG/BOM
|
||||||
|
ENCODING_MARKS: Dict[str, Union[bytes, List[bytes]]] = {
|
||||||
|
"utf_8": BOM_UTF8,
|
||||||
|
"utf_7": [
|
||||||
|
b"\x2b\x2f\x76\x38",
|
||||||
|
b"\x2b\x2f\x76\x39",
|
||||||
|
b"\x2b\x2f\x76\x2b",
|
||||||
|
b"\x2b\x2f\x76\x2f",
|
||||||
|
b"\x2b\x2f\x76\x38\x2d",
|
||||||
|
],
|
||||||
|
"gb18030": b"\x84\x31\x95\x33",
|
||||||
|
"utf_32": [BOM_UTF32_BE, BOM_UTF32_LE],
|
||||||
|
"utf_16": [BOM_UTF16_BE, BOM_UTF16_LE],
|
||||||
|
}
|
||||||
|
|
||||||
|
TOO_SMALL_SEQUENCE: int = 32
|
||||||
|
TOO_BIG_SEQUENCE: int = int(10e6)
|
||||||
|
|
||||||
|
UTF8_MAXIMAL_ALLOCATION: int = 1112064
|
||||||
|
|
||||||
|
UNICODE_RANGES_COMBINED: Dict[str, range] = {
|
||||||
|
"Control character": range(31 + 1),
|
||||||
|
"Basic Latin": range(32, 127 + 1),
|
||||||
|
"Latin-1 Supplement": range(128, 255 + 1),
|
||||||
|
"Latin Extended-A": range(256, 383 + 1),
|
||||||
|
"Latin Extended-B": range(384, 591 + 1),
|
||||||
|
"IPA Extensions": range(592, 687 + 1),
|
||||||
|
"Spacing Modifier Letters": range(688, 767 + 1),
|
||||||
|
"Combining Diacritical Marks": range(768, 879 + 1),
|
||||||
|
"Greek and Coptic": range(880, 1023 + 1),
|
||||||
|
"Cyrillic": range(1024, 1279 + 1),
|
||||||
|
"Cyrillic Supplement": range(1280, 1327 + 1),
|
||||||
|
"Armenian": range(1328, 1423 + 1),
|
||||||
|
"Hebrew": range(1424, 1535 + 1),
|
||||||
|
"Arabic": range(1536, 1791 + 1),
|
||||||
|
"Syriac": range(1792, 1871 + 1),
|
||||||
|
"Arabic Supplement": range(1872, 1919 + 1),
|
||||||
|
"Thaana": range(1920, 1983 + 1),
|
||||||
|
"NKo": range(1984, 2047 + 1),
|
||||||
|
"Samaritan": range(2048, 2111 + 1),
|
||||||
|
"Mandaic": range(2112, 2143 + 1),
|
||||||
|
"Syriac Supplement": range(2144, 2159 + 1),
|
||||||
|
"Arabic Extended-A": range(2208, 2303 + 1),
|
||||||
|
"Devanagari": range(2304, 2431 + 1),
|
||||||
|
"Bengali": range(2432, 2559 + 1),
|
||||||
|
"Gurmukhi": range(2560, 2687 + 1),
|
||||||
|
"Gujarati": range(2688, 2815 + 1),
|
||||||
|
"Oriya": range(2816, 2943 + 1),
|
||||||
|
"Tamil": range(2944, 3071 + 1),
|
||||||
|
"Telugu": range(3072, 3199 + 1),
|
||||||
|
"Kannada": range(3200, 3327 + 1),
|
||||||
|
"Malayalam": range(3328, 3455 + 1),
|
||||||
|
"Sinhala": range(3456, 3583 + 1),
|
||||||
|
"Thai": range(3584, 3711 + 1),
|
||||||
|
"Lao": range(3712, 3839 + 1),
|
||||||
|
"Tibetan": range(3840, 4095 + 1),
|
||||||
|
"Myanmar": range(4096, 4255 + 1),
|
||||||
|
"Georgian": range(4256, 4351 + 1),
|
||||||
|
"Hangul Jamo": range(4352, 4607 + 1),
|
||||||
|
"Ethiopic": range(4608, 4991 + 1),
|
||||||
|
"Ethiopic Supplement": range(4992, 5023 + 1),
|
||||||
|
"Cherokee": range(5024, 5119 + 1),
|
||||||
|
"Unified Canadian Aboriginal Syllabics": range(5120, 5759 + 1),
|
||||||
|
"Ogham": range(5760, 5791 + 1),
|
||||||
|
"Runic": range(5792, 5887 + 1),
|
||||||
|
"Tagalog": range(5888, 5919 + 1),
|
||||||
|
"Hanunoo": range(5920, 5951 + 1),
|
||||||
|
"Buhid": range(5952, 5983 + 1),
|
||||||
|
"Tagbanwa": range(5984, 6015 + 1),
|
||||||
|
"Khmer": range(6016, 6143 + 1),
|
||||||
|
"Mongolian": range(6144, 6319 + 1),
|
||||||
|
"Unified Canadian Aboriginal Syllabics Extended": range(6320, 6399 + 1),
|
||||||
|
"Limbu": range(6400, 6479 + 1),
|
||||||
|
"Tai Le": range(6480, 6527 + 1),
|
||||||
|
"New Tai Lue": range(6528, 6623 + 1),
|
||||||
|
"Khmer Symbols": range(6624, 6655 + 1),
|
||||||
|
"Buginese": range(6656, 6687 + 1),
|
||||||
|
"Tai Tham": range(6688, 6831 + 1),
|
||||||
|
"Combining Diacritical Marks Extended": range(6832, 6911 + 1),
|
||||||
|
"Balinese": range(6912, 7039 + 1),
|
||||||
|
"Sundanese": range(7040, 7103 + 1),
|
||||||
|
"Batak": range(7104, 7167 + 1),
|
||||||
|
"Lepcha": range(7168, 7247 + 1),
|
||||||
|
"Ol Chiki": range(7248, 7295 + 1),
|
||||||
|
"Cyrillic Extended C": range(7296, 7311 + 1),
|
||||||
|
"Sundanese Supplement": range(7360, 7375 + 1),
|
||||||
|
"Vedic Extensions": range(7376, 7423 + 1),
|
||||||
|
"Phonetic Extensions": range(7424, 7551 + 1),
|
||||||
|
"Phonetic Extensions Supplement": range(7552, 7615 + 1),
|
||||||
|
"Combining Diacritical Marks Supplement": range(7616, 7679 + 1),
|
||||||
|
"Latin Extended Additional": range(7680, 7935 + 1),
|
||||||
|
"Greek Extended": range(7936, 8191 + 1),
|
||||||
|
"General Punctuation": range(8192, 8303 + 1),
|
||||||
|
"Superscripts and Subscripts": range(8304, 8351 + 1),
|
||||||
|
"Currency Symbols": range(8352, 8399 + 1),
|
||||||
|
"Combining Diacritical Marks for Symbols": range(8400, 8447 + 1),
|
||||||
|
"Letterlike Symbols": range(8448, 8527 + 1),
|
||||||
|
"Number Forms": range(8528, 8591 + 1),
|
||||||
|
"Arrows": range(8592, 8703 + 1),
|
||||||
|
"Mathematical Operators": range(8704, 8959 + 1),
|
||||||
|
"Miscellaneous Technical": range(8960, 9215 + 1),
|
||||||
|
"Control Pictures": range(9216, 9279 + 1),
|
||||||
|
"Optical Character Recognition": range(9280, 9311 + 1),
|
||||||
|
"Enclosed Alphanumerics": range(9312, 9471 + 1),
|
||||||
|
"Box Drawing": range(9472, 9599 + 1),
|
||||||
|
"Block Elements": range(9600, 9631 + 1),
|
||||||
|
"Geometric Shapes": range(9632, 9727 + 1),
|
||||||
|
"Miscellaneous Symbols": range(9728, 9983 + 1),
|
||||||
|
"Dingbats": range(9984, 10175 + 1),
|
||||||
|
"Miscellaneous Mathematical Symbols-A": range(10176, 10223 + 1),
|
||||||
|
"Supplemental Arrows-A": range(10224, 10239 + 1),
|
||||||
|
"Braille Patterns": range(10240, 10495 + 1),
|
||||||
|
"Supplemental Arrows-B": range(10496, 10623 + 1),
|
||||||
|
"Miscellaneous Mathematical Symbols-B": range(10624, 10751 + 1),
|
||||||
|
"Supplemental Mathematical Operators": range(10752, 11007 + 1),
|
||||||
|
"Miscellaneous Symbols and Arrows": range(11008, 11263 + 1),
|
||||||
|
"Glagolitic": range(11264, 11359 + 1),
|
||||||
|
"Latin Extended-C": range(11360, 11391 + 1),
|
||||||
|
"Coptic": range(11392, 11519 + 1),
|
||||||
|
"Georgian Supplement": range(11520, 11567 + 1),
|
||||||
|
"Tifinagh": range(11568, 11647 + 1),
|
||||||
|
"Ethiopic Extended": range(11648, 11743 + 1),
|
||||||
|
"Cyrillic Extended-A": range(11744, 11775 + 1),
|
||||||
|
"Supplemental Punctuation": range(11776, 11903 + 1),
|
||||||
|
"CJK Radicals Supplement": range(11904, 12031 + 1),
|
||||||
|
"Kangxi Radicals": range(12032, 12255 + 1),
|
||||||
|
"Ideographic Description Characters": range(12272, 12287 + 1),
|
||||||
|
"CJK Symbols and Punctuation": range(12288, 12351 + 1),
|
||||||
|
"Hiragana": range(12352, 12447 + 1),
|
||||||
|
"Katakana": range(12448, 12543 + 1),
|
||||||
|
"Bopomofo": range(12544, 12591 + 1),
|
||||||
|
"Hangul Compatibility Jamo": range(12592, 12687 + 1),
|
||||||
|
"Kanbun": range(12688, 12703 + 1),
|
||||||
|
"Bopomofo Extended": range(12704, 12735 + 1),
|
||||||
|
"CJK Strokes": range(12736, 12783 + 1),
|
||||||
|
"Katakana Phonetic Extensions": range(12784, 12799 + 1),
|
||||||
|
"Enclosed CJK Letters and Months": range(12800, 13055 + 1),
|
||||||
|
"CJK Compatibility": range(13056, 13311 + 1),
|
||||||
|
"CJK Unified Ideographs Extension A": range(13312, 19903 + 1),
|
||||||
|
"Yijing Hexagram Symbols": range(19904, 19967 + 1),
|
||||||
|
"CJK Unified Ideographs": range(19968, 40959 + 1),
|
||||||
|
"Yi Syllables": range(40960, 42127 + 1),
|
||||||
|
"Yi Radicals": range(42128, 42191 + 1),
|
||||||
|
"Lisu": range(42192, 42239 + 1),
|
||||||
|
"Vai": range(42240, 42559 + 1),
|
||||||
|
"Cyrillic Extended-B": range(42560, 42655 + 1),
|
||||||
|
"Bamum": range(42656, 42751 + 1),
|
||||||
|
"Modifier Tone Letters": range(42752, 42783 + 1),
|
||||||
|
"Latin Extended-D": range(42784, 43007 + 1),
|
||||||
|
"Syloti Nagri": range(43008, 43055 + 1),
|
||||||
|
"Common Indic Number Forms": range(43056, 43071 + 1),
|
||||||
|
"Phags-pa": range(43072, 43135 + 1),
|
||||||
|
"Saurashtra": range(43136, 43231 + 1),
|
||||||
|
"Devanagari Extended": range(43232, 43263 + 1),
|
||||||
|
"Kayah Li": range(43264, 43311 + 1),
|
||||||
|
"Rejang": range(43312, 43359 + 1),
|
||||||
|
"Hangul Jamo Extended-A": range(43360, 43391 + 1),
|
||||||
|
"Javanese": range(43392, 43487 + 1),
|
||||||
|
"Myanmar Extended-B": range(43488, 43519 + 1),
|
||||||
|
"Cham": range(43520, 43615 + 1),
|
||||||
|
"Myanmar Extended-A": range(43616, 43647 + 1),
|
||||||
|
"Tai Viet": range(43648, 43743 + 1),
|
||||||
|
"Meetei Mayek Extensions": range(43744, 43775 + 1),
|
||||||
|
"Ethiopic Extended-A": range(43776, 43823 + 1),
|
||||||
|
"Latin Extended-E": range(43824, 43887 + 1),
|
||||||
|
"Cherokee Supplement": range(43888, 43967 + 1),
|
||||||
|
"Meetei Mayek": range(43968, 44031 + 1),
|
||||||
|
"Hangul Syllables": range(44032, 55215 + 1),
|
||||||
|
"Hangul Jamo Extended-B": range(55216, 55295 + 1),
|
||||||
|
"High Surrogates": range(55296, 56191 + 1),
|
||||||
|
"High Private Use Surrogates": range(56192, 56319 + 1),
|
||||||
|
"Low Surrogates": range(56320, 57343 + 1),
|
||||||
|
"Private Use Area": range(57344, 63743 + 1),
|
||||||
|
"CJK Compatibility Ideographs": range(63744, 64255 + 1),
|
||||||
|
"Alphabetic Presentation Forms": range(64256, 64335 + 1),
|
||||||
|
"Arabic Presentation Forms-A": range(64336, 65023 + 1),
|
||||||
|
"Variation Selectors": range(65024, 65039 + 1),
|
||||||
|
"Vertical Forms": range(65040, 65055 + 1),
|
||||||
|
"Combining Half Marks": range(65056, 65071 + 1),
|
||||||
|
"CJK Compatibility Forms": range(65072, 65103 + 1),
|
||||||
|
"Small Form Variants": range(65104, 65135 + 1),
|
||||||
|
"Arabic Presentation Forms-B": range(65136, 65279 + 1),
|
||||||
|
"Halfwidth and Fullwidth Forms": range(65280, 65519 + 1),
|
||||||
|
"Specials": range(65520, 65535 + 1),
|
||||||
|
"Linear B Syllabary": range(65536, 65663 + 1),
|
||||||
|
"Linear B Ideograms": range(65664, 65791 + 1),
|
||||||
|
"Aegean Numbers": range(65792, 65855 + 1),
|
||||||
|
"Ancient Greek Numbers": range(65856, 65935 + 1),
|
||||||
|
"Ancient Symbols": range(65936, 65999 + 1),
|
||||||
|
"Phaistos Disc": range(66000, 66047 + 1),
|
||||||
|
"Lycian": range(66176, 66207 + 1),
|
||||||
|
"Carian": range(66208, 66271 + 1),
|
||||||
|
"Coptic Epact Numbers": range(66272, 66303 + 1),
|
||||||
|
"Old Italic": range(66304, 66351 + 1),
|
||||||
|
"Gothic": range(66352, 66383 + 1),
|
||||||
|
"Old Permic": range(66384, 66431 + 1),
|
||||||
|
"Ugaritic": range(66432, 66463 + 1),
|
||||||
|
"Old Persian": range(66464, 66527 + 1),
|
||||||
|
"Deseret": range(66560, 66639 + 1),
|
||||||
|
"Shavian": range(66640, 66687 + 1),
|
||||||
|
"Osmanya": range(66688, 66735 + 1),
|
||||||
|
"Osage": range(66736, 66815 + 1),
|
||||||
|
"Elbasan": range(66816, 66863 + 1),
|
||||||
|
"Caucasian Albanian": range(66864, 66927 + 1),
|
||||||
|
"Linear A": range(67072, 67455 + 1),
|
||||||
|
"Cypriot Syllabary": range(67584, 67647 + 1),
|
||||||
|
"Imperial Aramaic": range(67648, 67679 + 1),
|
||||||
|
"Palmyrene": range(67680, 67711 + 1),
|
||||||
|
"Nabataean": range(67712, 67759 + 1),
|
||||||
|
"Hatran": range(67808, 67839 + 1),
|
||||||
|
"Phoenician": range(67840, 67871 + 1),
|
||||||
|
"Lydian": range(67872, 67903 + 1),
|
||||||
|
"Meroitic Hieroglyphs": range(67968, 67999 + 1),
|
||||||
|
"Meroitic Cursive": range(68000, 68095 + 1),
|
||||||
|
"Kharoshthi": range(68096, 68191 + 1),
|
||||||
|
"Old South Arabian": range(68192, 68223 + 1),
|
||||||
|
"Old North Arabian": range(68224, 68255 + 1),
|
||||||
|
"Manichaean": range(68288, 68351 + 1),
|
||||||
|
"Avestan": range(68352, 68415 + 1),
|
||||||
|
"Inscriptional Parthian": range(68416, 68447 + 1),
|
||||||
|
"Inscriptional Pahlavi": range(68448, 68479 + 1),
|
||||||
|
"Psalter Pahlavi": range(68480, 68527 + 1),
|
||||||
|
"Old Turkic": range(68608, 68687 + 1),
|
||||||
|
"Old Hungarian": range(68736, 68863 + 1),
|
||||||
|
"Rumi Numeral Symbols": range(69216, 69247 + 1),
|
||||||
|
"Brahmi": range(69632, 69759 + 1),
|
||||||
|
"Kaithi": range(69760, 69839 + 1),
|
||||||
|
"Sora Sompeng": range(69840, 69887 + 1),
|
||||||
|
"Chakma": range(69888, 69967 + 1),
|
||||||
|
"Mahajani": range(69968, 70015 + 1),
|
||||||
|
"Sharada": range(70016, 70111 + 1),
|
||||||
|
"Sinhala Archaic Numbers": range(70112, 70143 + 1),
|
||||||
|
"Khojki": range(70144, 70223 + 1),
|
||||||
|
"Multani": range(70272, 70319 + 1),
|
||||||
|
"Khudawadi": range(70320, 70399 + 1),
|
||||||
|
"Grantha": range(70400, 70527 + 1),
|
||||||
|
"Newa": range(70656, 70783 + 1),
|
||||||
|
"Tirhuta": range(70784, 70879 + 1),
|
||||||
|
"Siddham": range(71040, 71167 + 1),
|
||||||
|
"Modi": range(71168, 71263 + 1),
|
||||||
|
"Mongolian Supplement": range(71264, 71295 + 1),
|
||||||
|
"Takri": range(71296, 71375 + 1),
|
||||||
|
"Ahom": range(71424, 71487 + 1),
|
||||||
|
"Warang Citi": range(71840, 71935 + 1),
|
||||||
|
"Zanabazar Square": range(72192, 72271 + 1),
|
||||||
|
"Soyombo": range(72272, 72367 + 1),
|
||||||
|
"Pau Cin Hau": range(72384, 72447 + 1),
|
||||||
|
"Bhaiksuki": range(72704, 72815 + 1),
|
||||||
|
"Marchen": range(72816, 72895 + 1),
|
||||||
|
"Masaram Gondi": range(72960, 73055 + 1),
|
||||||
|
"Cuneiform": range(73728, 74751 + 1),
|
||||||
|
"Cuneiform Numbers and Punctuation": range(74752, 74879 + 1),
|
||||||
|
"Early Dynastic Cuneiform": range(74880, 75087 + 1),
|
||||||
|
"Egyptian Hieroglyphs": range(77824, 78895 + 1),
|
||||||
|
"Anatolian Hieroglyphs": range(82944, 83583 + 1),
|
||||||
|
"Bamum Supplement": range(92160, 92735 + 1),
|
||||||
|
"Mro": range(92736, 92783 + 1),
|
||||||
|
"Bassa Vah": range(92880, 92927 + 1),
|
||||||
|
"Pahawh Hmong": range(92928, 93071 + 1),
|
||||||
|
"Miao": range(93952, 94111 + 1),
|
||||||
|
"Ideographic Symbols and Punctuation": range(94176, 94207 + 1),
|
||||||
|
"Tangut": range(94208, 100351 + 1),
|
||||||
|
"Tangut Components": range(100352, 101119 + 1),
|
||||||
|
"Kana Supplement": range(110592, 110847 + 1),
|
||||||
|
"Kana Extended-A": range(110848, 110895 + 1),
|
||||||
|
"Nushu": range(110960, 111359 + 1),
|
||||||
|
"Duployan": range(113664, 113823 + 1),
|
||||||
|
"Shorthand Format Controls": range(113824, 113839 + 1),
|
||||||
|
"Byzantine Musical Symbols": range(118784, 119039 + 1),
|
||||||
|
"Musical Symbols": range(119040, 119295 + 1),
|
||||||
|
"Ancient Greek Musical Notation": range(119296, 119375 + 1),
|
||||||
|
"Tai Xuan Jing Symbols": range(119552, 119647 + 1),
|
||||||
|
"Counting Rod Numerals": range(119648, 119679 + 1),
|
||||||
|
"Mathematical Alphanumeric Symbols": range(119808, 120831 + 1),
|
||||||
|
"Sutton SignWriting": range(120832, 121519 + 1),
|
||||||
|
"Glagolitic Supplement": range(122880, 122927 + 1),
|
||||||
|
"Mende Kikakui": range(124928, 125151 + 1),
|
||||||
|
"Adlam": range(125184, 125279 + 1),
|
||||||
|
"Arabic Mathematical Alphabetic Symbols": range(126464, 126719 + 1),
|
||||||
|
"Mahjong Tiles": range(126976, 127023 + 1),
|
||||||
|
"Domino Tiles": range(127024, 127135 + 1),
|
||||||
|
"Playing Cards": range(127136, 127231 + 1),
|
||||||
|
"Enclosed Alphanumeric Supplement": range(127232, 127487 + 1),
|
||||||
|
"Enclosed Ideographic Supplement": range(127488, 127743 + 1),
|
||||||
|
"Miscellaneous Symbols and Pictographs": range(127744, 128511 + 1),
|
||||||
|
"Emoticons range(Emoji)": range(128512, 128591 + 1),
|
||||||
|
"Ornamental Dingbats": range(128592, 128639 + 1),
|
||||||
|
"Transport and Map Symbols": range(128640, 128767 + 1),
|
||||||
|
"Alchemical Symbols": range(128768, 128895 + 1),
|
||||||
|
"Geometric Shapes Extended": range(128896, 129023 + 1),
|
||||||
|
"Supplemental Arrows-C": range(129024, 129279 + 1),
|
||||||
|
"Supplemental Symbols and Pictographs": range(129280, 129535 + 1),
|
||||||
|
"CJK Unified Ideographs Extension B": range(131072, 173791 + 1),
|
||||||
|
"CJK Unified Ideographs Extension C": range(173824, 177983 + 1),
|
||||||
|
"CJK Unified Ideographs Extension D": range(177984, 178207 + 1),
|
||||||
|
"CJK Unified Ideographs Extension E": range(178208, 183983 + 1),
|
||||||
|
"CJK Unified Ideographs Extension F": range(183984, 191471 + 1),
|
||||||
|
"CJK Compatibility Ideographs Supplement": range(194560, 195103 + 1),
|
||||||
|
"Tags": range(917504, 917631 + 1),
|
||||||
|
"Variation Selectors Supplement": range(917760, 917999 + 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
UNICODE_SECONDARY_RANGE_KEYWORD: List[str] = [
|
||||||
|
"Supplement",
|
||||||
|
"Extended",
|
||||||
|
"Extensions",
|
||||||
|
"Modifier",
|
||||||
|
"Marks",
|
||||||
|
"Punctuation",
|
||||||
|
"Symbols",
|
||||||
|
"Forms",
|
||||||
|
"Operators",
|
||||||
|
"Miscellaneous",
|
||||||
|
"Drawing",
|
||||||
|
"Block",
|
||||||
|
"Shapes",
|
||||||
|
"Supplemental",
|
||||||
|
"Tags",
|
||||||
|
]
|
||||||
|
|
||||||
|
RE_POSSIBLE_ENCODING_INDICATION = re_compile(
|
||||||
|
r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)",
|
||||||
|
IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
IANA_SUPPORTED: List[str] = sorted(
|
||||||
|
filter(
|
||||||
|
lambda x: x.endswith("_codec") is False
|
||||||
|
and x not in {"rot_13", "tactis", "mbcs"},
|
||||||
|
list(set(aliases.values())),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED)
|
||||||
|
|
||||||
|
# pre-computed code page that are similar using the function cp_similarity.
|
||||||
|
IANA_SUPPORTED_SIMILAR: Dict[str, List[str]] = {
|
||||||
|
"cp037": ["cp1026", "cp1140", "cp273", "cp500"],
|
||||||
|
"cp1026": ["cp037", "cp1140", "cp273", "cp500"],
|
||||||
|
"cp1125": ["cp866"],
|
||||||
|
"cp1140": ["cp037", "cp1026", "cp273", "cp500"],
|
||||||
|
"cp1250": ["iso8859_2"],
|
||||||
|
"cp1251": ["kz1048", "ptcp154"],
|
||||||
|
"cp1252": ["iso8859_15", "iso8859_9", "latin_1"],
|
||||||
|
"cp1253": ["iso8859_7"],
|
||||||
|
"cp1254": ["iso8859_15", "iso8859_9", "latin_1"],
|
||||||
|
"cp1257": ["iso8859_13"],
|
||||||
|
"cp273": ["cp037", "cp1026", "cp1140", "cp500"],
|
||||||
|
"cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"],
|
||||||
|
"cp500": ["cp037", "cp1026", "cp1140", "cp273"],
|
||||||
|
"cp850": ["cp437", "cp857", "cp858", "cp865"],
|
||||||
|
"cp857": ["cp850", "cp858", "cp865"],
|
||||||
|
"cp858": ["cp437", "cp850", "cp857", "cp865"],
|
||||||
|
"cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"],
|
||||||
|
"cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"],
|
||||||
|
"cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"],
|
||||||
|
"cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"],
|
||||||
|
"cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"],
|
||||||
|
"cp866": ["cp1125"],
|
||||||
|
"iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"],
|
||||||
|
"iso8859_11": ["tis_620"],
|
||||||
|
"iso8859_13": ["cp1257"],
|
||||||
|
"iso8859_14": [
|
||||||
|
"iso8859_10",
|
||||||
|
"iso8859_15",
|
||||||
|
"iso8859_16",
|
||||||
|
"iso8859_3",
|
||||||
|
"iso8859_9",
|
||||||
|
"latin_1",
|
||||||
|
],
|
||||||
|
"iso8859_15": [
|
||||||
|
"cp1252",
|
||||||
|
"cp1254",
|
||||||
|
"iso8859_10",
|
||||||
|
"iso8859_14",
|
||||||
|
"iso8859_16",
|
||||||
|
"iso8859_3",
|
||||||
|
"iso8859_9",
|
||||||
|
"latin_1",
|
||||||
|
],
|
||||||
|
"iso8859_16": [
|
||||||
|
"iso8859_14",
|
||||||
|
"iso8859_15",
|
||||||
|
"iso8859_2",
|
||||||
|
"iso8859_3",
|
||||||
|
"iso8859_9",
|
||||||
|
"latin_1",
|
||||||
|
],
|
||||||
|
"iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"],
|
||||||
|
"iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"],
|
||||||
|
"iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"],
|
||||||
|
"iso8859_7": ["cp1253"],
|
||||||
|
"iso8859_9": [
|
||||||
|
"cp1252",
|
||||||
|
"cp1254",
|
||||||
|
"cp1258",
|
||||||
|
"iso8859_10",
|
||||||
|
"iso8859_14",
|
||||||
|
"iso8859_15",
|
||||||
|
"iso8859_16",
|
||||||
|
"iso8859_3",
|
||||||
|
"iso8859_4",
|
||||||
|
"latin_1",
|
||||||
|
],
|
||||||
|
"kz1048": ["cp1251", "ptcp154"],
|
||||||
|
"latin_1": [
|
||||||
|
"cp1252",
|
||||||
|
"cp1254",
|
||||||
|
"cp1258",
|
||||||
|
"iso8859_10",
|
||||||
|
"iso8859_14",
|
||||||
|
"iso8859_15",
|
||||||
|
"iso8859_16",
|
||||||
|
"iso8859_3",
|
||||||
|
"iso8859_4",
|
||||||
|
"iso8859_9",
|
||||||
|
],
|
||||||
|
"mac_iceland": ["mac_roman", "mac_turkish"],
|
||||||
|
"mac_roman": ["mac_iceland", "mac_turkish"],
|
||||||
|
"mac_turkish": ["mac_iceland", "mac_roman"],
|
||||||
|
"ptcp154": ["cp1251", "kz1048"],
|
||||||
|
"tis_620": ["iso8859_11"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
CHARDET_CORRESPONDENCE: Dict[str, str] = {
|
||||||
|
"iso2022_kr": "ISO-2022-KR",
|
||||||
|
"iso2022_jp": "ISO-2022-JP",
|
||||||
|
"euc_kr": "EUC-KR",
|
||||||
|
"tis_620": "TIS-620",
|
||||||
|
"utf_32": "UTF-32",
|
||||||
|
"euc_jp": "EUC-JP",
|
||||||
|
"koi8_r": "KOI8-R",
|
||||||
|
"iso8859_1": "ISO-8859-1",
|
||||||
|
"iso8859_2": "ISO-8859-2",
|
||||||
|
"iso8859_5": "ISO-8859-5",
|
||||||
|
"iso8859_6": "ISO-8859-6",
|
||||||
|
"iso8859_7": "ISO-8859-7",
|
||||||
|
"iso8859_8": "ISO-8859-8",
|
||||||
|
"utf_16": "UTF-16",
|
||||||
|
"cp855": "IBM855",
|
||||||
|
"mac_cyrillic": "MacCyrillic",
|
||||||
|
"gb2312": "GB2312",
|
||||||
|
"gb18030": "GB18030",
|
||||||
|
"cp932": "CP932",
|
||||||
|
"cp866": "IBM866",
|
||||||
|
"utf_8": "utf-8",
|
||||||
|
"utf_8_sig": "UTF-8-SIG",
|
||||||
|
"shift_jis": "SHIFT_JIS",
|
||||||
|
"big5": "Big5",
|
||||||
|
"cp1250": "windows-1250",
|
||||||
|
"cp1251": "windows-1251",
|
||||||
|
"cp1252": "Windows-1252",
|
||||||
|
"cp1253": "windows-1253",
|
||||||
|
"cp1255": "windows-1255",
|
||||||
|
"cp1256": "windows-1256",
|
||||||
|
"cp1254": "Windows-1254",
|
||||||
|
"cp949": "CP949",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
COMMON_SAFE_ASCII_CHARACTERS: Set[str] = {
|
||||||
|
"<",
|
||||||
|
">",
|
||||||
|
"=",
|
||||||
|
":",
|
||||||
|
"/",
|
||||||
|
"&",
|
||||||
|
";",
|
||||||
|
"{",
|
||||||
|
"}",
|
||||||
|
"[",
|
||||||
|
"]",
|
||||||
|
",",
|
||||||
|
"|",
|
||||||
|
'"',
|
||||||
|
"-",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KO_NAMES: Set[str] = {"johab", "cp949", "euc_kr"}
|
||||||
|
ZH_NAMES: Set[str] = {"big5", "cp950", "big5hkscs", "hz"}
|
||||||
|
|
||||||
|
NOT_PRINTABLE_PATTERN = re_compile(r"[0-9\W\n\r\t]+")
|
||||||
|
|
||||||
|
LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)
|
||||||
|
|
||||||
|
# Logging LEVEL bellow DEBUG
|
||||||
|
TRACE: int = 5
|
@ -0,0 +1,95 @@
|
|||||||
|
import warnings
|
||||||
|
from typing import Dict, Optional, Union
|
||||||
|
|
||||||
|
from .api import from_bytes, from_fp, from_path, normalize
|
||||||
|
from .constant import CHARDET_CORRESPONDENCE
|
||||||
|
from .models import CharsetMatch, CharsetMatches
|
||||||
|
|
||||||
|
|
||||||
|
def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
|
||||||
|
"""
|
||||||
|
chardet legacy method
|
||||||
|
Detect the encoding of the given byte string. It should be mostly backward-compatible.
|
||||||
|
Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
|
||||||
|
This function is deprecated and should be used to migrate your project easily, consult the documentation for
|
||||||
|
further information. Not planned for removal.
|
||||||
|
|
||||||
|
:param byte_str: The byte sequence to examine.
|
||||||
|
"""
|
||||||
|
if not isinstance(byte_str, (bytearray, bytes)):
|
||||||
|
raise TypeError( # pragma: nocover
|
||||||
|
"Expected object of type bytes or bytearray, got: "
|
||||||
|
"{0}".format(type(byte_str))
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(byte_str, bytearray):
|
||||||
|
byte_str = bytes(byte_str)
|
||||||
|
|
||||||
|
r = from_bytes(byte_str).best()
|
||||||
|
|
||||||
|
encoding = r.encoding if r is not None else None
|
||||||
|
language = r.language if r is not None and r.language != "Unknown" else ""
|
||||||
|
confidence = 1.0 - r.chaos if r is not None else None
|
||||||
|
|
||||||
|
# Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
|
||||||
|
# but chardet does return 'utf-8-sig' and it is a valid codec name.
|
||||||
|
if r is not None and encoding == "utf_8" and r.bom:
|
||||||
|
encoding += "_sig"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"encoding": encoding
|
||||||
|
if encoding not in CHARDET_CORRESPONDENCE
|
||||||
|
else CHARDET_CORRESPONDENCE[encoding],
|
||||||
|
"language": language,
|
||||||
|
"confidence": confidence,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetNormalizerMatch(CharsetMatch):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetNormalizerMatches(CharsetMatches):
|
||||||
|
@staticmethod
|
||||||
|
def from_fp(*args, **kwargs): # type: ignore
|
||||||
|
warnings.warn( # pragma: nocover
|
||||||
|
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
|
||||||
|
"and scheduled to be removed in 3.0",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
return from_fp(*args, **kwargs) # pragma: nocover
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_bytes(*args, **kwargs): # type: ignore
|
||||||
|
warnings.warn( # pragma: nocover
|
||||||
|
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
|
||||||
|
"and scheduled to be removed in 3.0",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
return from_bytes(*args, **kwargs) # pragma: nocover
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_path(*args, **kwargs): # type: ignore
|
||||||
|
warnings.warn( # pragma: nocover
|
||||||
|
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
|
||||||
|
"and scheduled to be removed in 3.0",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
return from_path(*args, **kwargs) # pragma: nocover
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def normalize(*args, **kwargs): # type: ignore
|
||||||
|
warnings.warn( # pragma: nocover
|
||||||
|
"staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
|
||||||
|
"and scheduled to be removed in 3.0",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
return normalize(*args, **kwargs) # pragma: nocover
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetDetector(CharsetNormalizerMatches):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetDoctor(CharsetNormalizerMatches):
|
||||||
|
pass
|
@ -0,0 +1,553 @@
|
|||||||
|
from functools import lru_cache
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from .constant import COMMON_SAFE_ASCII_CHARACTERS, UNICODE_SECONDARY_RANGE_KEYWORD
|
||||||
|
from .utils import (
|
||||||
|
is_accentuated,
|
||||||
|
is_ascii,
|
||||||
|
is_case_variable,
|
||||||
|
is_cjk,
|
||||||
|
is_emoticon,
|
||||||
|
is_hangul,
|
||||||
|
is_hiragana,
|
||||||
|
is_katakana,
|
||||||
|
is_latin,
|
||||||
|
is_punctuation,
|
||||||
|
is_separator,
|
||||||
|
is_symbol,
|
||||||
|
is_thai,
|
||||||
|
is_unprintable,
|
||||||
|
remove_accent,
|
||||||
|
unicode_range,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MessDetectorPlugin:
|
||||||
|
"""
|
||||||
|
Base abstract class used for mess detection plugins.
|
||||||
|
All detectors MUST extend and implement given methods.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
"""
|
||||||
|
Determine if given character should be fed in.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError # pragma: nocover
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
"""
|
||||||
|
The main routine to be executed upon character.
|
||||||
|
Insert the logic in witch the text would be considered chaotic.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError # pragma: nocover
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
"""
|
||||||
|
Permit to reset the plugin to the initial state.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
"""
|
||||||
|
Compute the chaos ratio based on what your feed() has seen.
|
||||||
|
Must NOT be lower than 0.; No restriction gt 0.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError # pragma: nocover
|
||||||
|
|
||||||
|
|
||||||
|
class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._punctuation_count: int = 0
|
||||||
|
self._symbol_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
self._last_printable_char: Optional[str] = None
|
||||||
|
self._frenzy_symbol_in_word: bool = False
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isprintable()
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
if (
|
||||||
|
character != self._last_printable_char
|
||||||
|
and character not in COMMON_SAFE_ASCII_CHARACTERS
|
||||||
|
):
|
||||||
|
if is_punctuation(character):
|
||||||
|
self._punctuation_count += 1
|
||||||
|
elif (
|
||||||
|
character.isdigit() is False
|
||||||
|
and is_symbol(character)
|
||||||
|
and is_emoticon(character) is False
|
||||||
|
):
|
||||||
|
self._symbol_count += 2
|
||||||
|
|
||||||
|
self._last_printable_char = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._punctuation_count = 0
|
||||||
|
self._character_count = 0
|
||||||
|
self._symbol_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
ratio_of_punctuation: float = (
|
||||||
|
self._punctuation_count + self._symbol_count
|
||||||
|
) / self._character_count
|
||||||
|
|
||||||
|
return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class TooManyAccentuatedPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._character_count: int = 0
|
||||||
|
self._accentuated_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isalpha()
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
if is_accentuated(character):
|
||||||
|
self._accentuated_count += 1
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._character_count = 0
|
||||||
|
self._accentuated_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
ratio_of_accentuation: float = self._accentuated_count / self._character_count
|
||||||
|
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class UnprintablePlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._unprintable_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
if is_unprintable(character):
|
||||||
|
self._unprintable_count += 1
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._unprintable_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return (self._unprintable_count * 8) / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._successive_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
self._last_latin_character: Optional[str] = None
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isalpha() and is_latin(character)
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
if (
|
||||||
|
self._last_latin_character is not None
|
||||||
|
and is_accentuated(character)
|
||||||
|
and is_accentuated(self._last_latin_character)
|
||||||
|
):
|
||||||
|
if character.isupper() and self._last_latin_character.isupper():
|
||||||
|
self._successive_count += 1
|
||||||
|
# Worse if its the same char duplicated with different accent.
|
||||||
|
if remove_accent(character) == remove_accent(self._last_latin_character):
|
||||||
|
self._successive_count += 1
|
||||||
|
self._last_latin_character = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._successive_count = 0
|
||||||
|
self._character_count = 0
|
||||||
|
self._last_latin_character = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return (self._successive_count * 2) / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
class SuspiciousRange(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._suspicious_successive_range_count: int = 0
|
||||||
|
self._character_count: int = 0
|
||||||
|
self._last_printable_seen: Optional[str] = None
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return character.isprintable()
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
self._character_count += 1
|
||||||
|
|
||||||
|
if (
|
||||||
|
character.isspace()
|
||||||
|
or is_punctuation(character)
|
||||||
|
or character in COMMON_SAFE_ASCII_CHARACTERS
|
||||||
|
):
|
||||||
|
self._last_printable_seen = None
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._last_printable_seen is None:
|
||||||
|
self._last_printable_seen = character
|
||||||
|
return
|
||||||
|
|
||||||
|
unicode_range_a: Optional[str] = unicode_range(self._last_printable_seen)
|
||||||
|
unicode_range_b: Optional[str] = unicode_range(character)
|
||||||
|
|
||||||
|
if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
|
||||||
|
self._suspicious_successive_range_count += 1
|
||||||
|
|
||||||
|
self._last_printable_seen = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._character_count = 0
|
||||||
|
self._suspicious_successive_range_count = 0
|
||||||
|
self._last_printable_seen = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
ratio_of_suspicious_range_usage: float = (
|
||||||
|
self._suspicious_successive_range_count * 2
|
||||||
|
) / self._character_count
|
||||||
|
|
||||||
|
if ratio_of_suspicious_range_usage < 0.1:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return ratio_of_suspicious_range_usage
|
||||||
|
|
||||||
|
|
||||||
|
class SuperWeirdWordPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._word_count: int = 0
|
||||||
|
self._bad_word_count: int = 0
|
||||||
|
self._foreign_long_count: int = 0
|
||||||
|
|
||||||
|
self._is_current_word_bad: bool = False
|
||||||
|
self._foreign_long_watch: bool = False
|
||||||
|
|
||||||
|
self._character_count: int = 0
|
||||||
|
self._bad_character_count: int = 0
|
||||||
|
|
||||||
|
self._buffer: str = ""
|
||||||
|
self._buffer_accent_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
if character.isalpha():
|
||||||
|
self._buffer += character
|
||||||
|
if is_accentuated(character):
|
||||||
|
self._buffer_accent_count += 1
|
||||||
|
if (
|
||||||
|
self._foreign_long_watch is False
|
||||||
|
and (is_latin(character) is False or is_accentuated(character))
|
||||||
|
and is_cjk(character) is False
|
||||||
|
and is_hangul(character) is False
|
||||||
|
and is_katakana(character) is False
|
||||||
|
and is_hiragana(character) is False
|
||||||
|
and is_thai(character) is False
|
||||||
|
):
|
||||||
|
self._foreign_long_watch = True
|
||||||
|
return
|
||||||
|
if not self._buffer:
|
||||||
|
return
|
||||||
|
if (
|
||||||
|
character.isspace() or is_punctuation(character) or is_separator(character)
|
||||||
|
) and self._buffer:
|
||||||
|
self._word_count += 1
|
||||||
|
buffer_length: int = len(self._buffer)
|
||||||
|
|
||||||
|
self._character_count += buffer_length
|
||||||
|
|
||||||
|
if buffer_length >= 4:
|
||||||
|
if self._buffer_accent_count / buffer_length > 0.34:
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
# Word/Buffer ending with a upper case accentuated letter are so rare,
|
||||||
|
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
|
||||||
|
if is_accentuated(self._buffer[-1]) and self._buffer[-1].isupper():
|
||||||
|
self._foreign_long_count += 1
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
if buffer_length >= 24 and self._foreign_long_watch:
|
||||||
|
self._foreign_long_count += 1
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
|
||||||
|
if self._is_current_word_bad:
|
||||||
|
self._bad_word_count += 1
|
||||||
|
self._bad_character_count += len(self._buffer)
|
||||||
|
self._is_current_word_bad = False
|
||||||
|
|
||||||
|
self._foreign_long_watch = False
|
||||||
|
self._buffer = ""
|
||||||
|
self._buffer_accent_count = 0
|
||||||
|
elif (
|
||||||
|
character not in {"<", ">", "-", "=", "~", "|", "_"}
|
||||||
|
and character.isdigit() is False
|
||||||
|
and is_symbol(character)
|
||||||
|
):
|
||||||
|
self._is_current_word_bad = True
|
||||||
|
self._buffer += character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._buffer = ""
|
||||||
|
self._is_current_word_bad = False
|
||||||
|
self._foreign_long_watch = False
|
||||||
|
self._bad_word_count = 0
|
||||||
|
self._word_count = 0
|
||||||
|
self._character_count = 0
|
||||||
|
self._bad_character_count = 0
|
||||||
|
self._foreign_long_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._word_count <= 10 and self._foreign_long_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return self._bad_character_count / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
class CjkInvalidStopPlugin(MessDetectorPlugin):
|
||||||
|
"""
|
||||||
|
GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
|
||||||
|
can be easily detected. Searching for the overuse of '丅' and '丄'.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._wrong_stop_count: int = 0
|
||||||
|
self._cjk_character_count: int = 0
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
if character in {"丅", "丄"}:
|
||||||
|
self._wrong_stop_count += 1
|
||||||
|
return
|
||||||
|
if is_cjk(character):
|
||||||
|
self._cjk_character_count += 1
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._wrong_stop_count = 0
|
||||||
|
self._cjk_character_count = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._cjk_character_count < 16:
|
||||||
|
return 0.0
|
||||||
|
return self._wrong_stop_count / self._cjk_character_count
|
||||||
|
|
||||||
|
|
||||||
|
class ArchaicUpperLowerPlugin(MessDetectorPlugin):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._buf: bool = False
|
||||||
|
|
||||||
|
self._character_count_since_last_sep: int = 0
|
||||||
|
|
||||||
|
self._successive_upper_lower_count: int = 0
|
||||||
|
self._successive_upper_lower_count_final: int = 0
|
||||||
|
|
||||||
|
self._character_count: int = 0
|
||||||
|
|
||||||
|
self._last_alpha_seen: Optional[str] = None
|
||||||
|
self._current_ascii_only: bool = True
|
||||||
|
|
||||||
|
def eligible(self, character: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def feed(self, character: str) -> None:
|
||||||
|
is_concerned = character.isalpha() and is_case_variable(character)
|
||||||
|
chunk_sep = is_concerned is False
|
||||||
|
|
||||||
|
if chunk_sep and self._character_count_since_last_sep > 0:
|
||||||
|
if (
|
||||||
|
self._character_count_since_last_sep <= 64
|
||||||
|
and character.isdigit() is False
|
||||||
|
and self._current_ascii_only is False
|
||||||
|
):
|
||||||
|
self._successive_upper_lower_count_final += (
|
||||||
|
self._successive_upper_lower_count
|
||||||
|
)
|
||||||
|
|
||||||
|
self._successive_upper_lower_count = 0
|
||||||
|
self._character_count_since_last_sep = 0
|
||||||
|
self._last_alpha_seen = None
|
||||||
|
self._buf = False
|
||||||
|
self._character_count += 1
|
||||||
|
self._current_ascii_only = True
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._current_ascii_only is True and is_ascii(character) is False:
|
||||||
|
self._current_ascii_only = False
|
||||||
|
|
||||||
|
if self._last_alpha_seen is not None:
|
||||||
|
if (character.isupper() and self._last_alpha_seen.islower()) or (
|
||||||
|
character.islower() and self._last_alpha_seen.isupper()
|
||||||
|
):
|
||||||
|
if self._buf is True:
|
||||||
|
self._successive_upper_lower_count += 2
|
||||||
|
self._buf = False
|
||||||
|
else:
|
||||||
|
self._buf = True
|
||||||
|
else:
|
||||||
|
self._buf = False
|
||||||
|
|
||||||
|
self._character_count += 1
|
||||||
|
self._character_count_since_last_sep += 1
|
||||||
|
self._last_alpha_seen = character
|
||||||
|
|
||||||
|
def reset(self) -> None: # pragma: no cover
|
||||||
|
self._character_count = 0
|
||||||
|
self._character_count_since_last_sep = 0
|
||||||
|
self._successive_upper_lower_count = 0
|
||||||
|
self._successive_upper_lower_count_final = 0
|
||||||
|
self._last_alpha_seen = None
|
||||||
|
self._buf = False
|
||||||
|
self._current_ascii_only = True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ratio(self) -> float:
|
||||||
|
if self._character_count == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return self._successive_upper_lower_count_final / self._character_count
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1024)
|
||||||
|
def is_suspiciously_successive_range(
|
||||||
|
unicode_range_a: Optional[str], unicode_range_b: Optional[str]
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Determine if two Unicode range seen next to each other can be considered as suspicious.
|
||||||
|
"""
|
||||||
|
if unicode_range_a is None or unicode_range_b is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if unicode_range_a == unicode_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Latin characters can be accompanied with a combining diacritical mark
|
||||||
|
# eg. Vietnamese.
|
||||||
|
if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
|
||||||
|
"Combining" in unicode_range_a or "Combining" in unicode_range_b
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
keywords_range_a, keywords_range_b = unicode_range_a.split(
|
||||||
|
" "
|
||||||
|
), unicode_range_b.split(" ")
|
||||||
|
|
||||||
|
for el in keywords_range_a:
|
||||||
|
if el in UNICODE_SECONDARY_RANGE_KEYWORD:
|
||||||
|
continue
|
||||||
|
if el in keywords_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Japanese Exception
|
||||||
|
range_a_jp_chars, range_b_jp_chars = (
|
||||||
|
unicode_range_a
|
||||||
|
in (
|
||||||
|
"Hiragana",
|
||||||
|
"Katakana",
|
||||||
|
),
|
||||||
|
unicode_range_b in ("Hiragana", "Katakana"),
|
||||||
|
)
|
||||||
|
if (range_a_jp_chars or range_b_jp_chars) and (
|
||||||
|
"CJK" in unicode_range_a or "CJK" in unicode_range_b
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
if range_a_jp_chars and range_b_jp_chars:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
|
||||||
|
if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Chinese/Japanese use dedicated range for punctuation and/or separators.
|
||||||
|
if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
|
||||||
|
unicode_range_a in ["Katakana", "Hiragana"]
|
||||||
|
and unicode_range_b in ["Katakana", "Hiragana"]
|
||||||
|
):
|
||||||
|
if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=2048)
|
||||||
|
def mess_ratio(
|
||||||
|
decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
|
||||||
|
"""
|
||||||
|
|
||||||
|
detectors: List[MessDetectorPlugin] = [
|
||||||
|
md_class() for md_class in MessDetectorPlugin.__subclasses__()
|
||||||
|
]
|
||||||
|
|
||||||
|
length: int = len(decoded_sequence) + 1
|
||||||
|
|
||||||
|
mean_mess_ratio: float = 0.0
|
||||||
|
|
||||||
|
if length < 512:
|
||||||
|
intermediary_mean_mess_ratio_calc: int = 32
|
||||||
|
elif length <= 1024:
|
||||||
|
intermediary_mean_mess_ratio_calc = 64
|
||||||
|
else:
|
||||||
|
intermediary_mean_mess_ratio_calc = 128
|
||||||
|
|
||||||
|
for character, index in zip(decoded_sequence + "\n", range(length)):
|
||||||
|
for detector in detectors:
|
||||||
|
if detector.eligible(character):
|
||||||
|
detector.feed(character)
|
||||||
|
|
||||||
|
if (
|
||||||
|
index > 0 and index % intermediary_mean_mess_ratio_calc == 0
|
||||||
|
) or index == length - 1:
|
||||||
|
mean_mess_ratio = sum(dt.ratio for dt in detectors)
|
||||||
|
|
||||||
|
if mean_mess_ratio >= maximum_threshold:
|
||||||
|
break
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
for dt in detectors: # pragma: nocover
|
||||||
|
print(dt.__class__, dt.ratio)
|
||||||
|
|
||||||
|
return round(mean_mess_ratio, 3)
|
@ -0,0 +1,401 @@
|
|||||||
|
import warnings
|
||||||
|
from collections import Counter
|
||||||
|
from encodings.aliases import aliases
|
||||||
|
from hashlib import sha256
|
||||||
|
from json import dumps
|
||||||
|
from re import sub
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Counter as TypeCounter,
|
||||||
|
Dict,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Union,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .constant import NOT_PRINTABLE_PATTERN, TOO_BIG_SEQUENCE
|
||||||
|
from .md import mess_ratio
|
||||||
|
from .utils import iana_name, is_multi_byte_encoding, unicode_range
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetMatch:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
payload: bytes,
|
||||||
|
guessed_encoding: str,
|
||||||
|
mean_mess_ratio: float,
|
||||||
|
has_sig_or_bom: bool,
|
||||||
|
languages: "CoherenceMatches",
|
||||||
|
decoded_payload: Optional[str] = None,
|
||||||
|
):
|
||||||
|
self._payload: bytes = payload
|
||||||
|
|
||||||
|
self._encoding: str = guessed_encoding
|
||||||
|
self._mean_mess_ratio: float = mean_mess_ratio
|
||||||
|
self._languages: CoherenceMatches = languages
|
||||||
|
self._has_sig_or_bom: bool = has_sig_or_bom
|
||||||
|
self._unicode_ranges: Optional[List[str]] = None
|
||||||
|
|
||||||
|
self._leaves: List[CharsetMatch] = []
|
||||||
|
self._mean_coherence_ratio: float = 0.0
|
||||||
|
|
||||||
|
self._output_payload: Optional[bytes] = None
|
||||||
|
self._output_encoding: Optional[str] = None
|
||||||
|
|
||||||
|
self._string: Optional[str] = decoded_payload
|
||||||
|
|
||||||
|
def __eq__(self, other: object) -> bool:
|
||||||
|
if not isinstance(other, CharsetMatch):
|
||||||
|
raise TypeError(
|
||||||
|
"__eq__ cannot be invoked on {} and {}.".format(
|
||||||
|
str(other.__class__), str(self.__class__)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return self.encoding == other.encoding and self.fingerprint == other.fingerprint
|
||||||
|
|
||||||
|
def __lt__(self, other: object) -> bool:
|
||||||
|
"""
|
||||||
|
Implemented to make sorted available upon CharsetMatches items.
|
||||||
|
"""
|
||||||
|
if not isinstance(other, CharsetMatch):
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
chaos_difference: float = abs(self.chaos - other.chaos)
|
||||||
|
coherence_difference: float = abs(self.coherence - other.coherence)
|
||||||
|
|
||||||
|
# Bellow 1% difference --> Use Coherence
|
||||||
|
if chaos_difference < 0.01 and coherence_difference > 0.02:
|
||||||
|
# When having a tough decision, use the result that decoded as many multi-byte as possible.
|
||||||
|
if chaos_difference == 0.0 and self.coherence == other.coherence:
|
||||||
|
return self.multi_byte_usage > other.multi_byte_usage
|
||||||
|
return self.coherence > other.coherence
|
||||||
|
|
||||||
|
return self.chaos < other.chaos
|
||||||
|
|
||||||
|
@property
|
||||||
|
def multi_byte_usage(self) -> float:
|
||||||
|
return 1.0 - len(str(self)) / len(self.raw)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def chaos_secondary_pass(self) -> float:
|
||||||
|
"""
|
||||||
|
Check once again chaos in decoded text, except this time, with full content.
|
||||||
|
Use with caution, this can be very slow.
|
||||||
|
Notice: Will be removed in 3.0
|
||||||
|
"""
|
||||||
|
warnings.warn(
|
||||||
|
"chaos_secondary_pass is deprecated and will be removed in 3.0",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
return mess_ratio(str(self), 1.0)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def coherence_non_latin(self) -> float:
|
||||||
|
"""
|
||||||
|
Coherence ratio on the first non-latin language detected if ANY.
|
||||||
|
Notice: Will be removed in 3.0
|
||||||
|
"""
|
||||||
|
warnings.warn(
|
||||||
|
"coherence_non_latin is deprecated and will be removed in 3.0",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def w_counter(self) -> TypeCounter[str]:
|
||||||
|
"""
|
||||||
|
Word counter instance on decoded text.
|
||||||
|
Notice: Will be removed in 3.0
|
||||||
|
"""
|
||||||
|
warnings.warn(
|
||||||
|
"w_counter is deprecated and will be removed in 3.0", DeprecationWarning
|
||||||
|
)
|
||||||
|
|
||||||
|
string_printable_only = sub(NOT_PRINTABLE_PATTERN, " ", str(self).lower())
|
||||||
|
|
||||||
|
return Counter(string_printable_only.split())
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
# Lazy Str Loading
|
||||||
|
if self._string is None:
|
||||||
|
self._string = str(self._payload, self._encoding, "strict")
|
||||||
|
return self._string
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return "<CharsetMatch '{}' bytes({})>".format(self.encoding, self.fingerprint)
|
||||||
|
|
||||||
|
def add_submatch(self, other: "CharsetMatch") -> None:
|
||||||
|
if not isinstance(other, CharsetMatch) or other == self:
|
||||||
|
raise ValueError(
|
||||||
|
"Unable to add instance <{}> as a submatch of a CharsetMatch".format(
|
||||||
|
other.__class__
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
other._string = None # Unload RAM usage; dirty trick.
|
||||||
|
self._leaves.append(other)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding(self) -> str:
|
||||||
|
return self._encoding
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding_aliases(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
|
||||||
|
"""
|
||||||
|
also_known_as: List[str] = []
|
||||||
|
for u, p in aliases.items():
|
||||||
|
if self.encoding == u:
|
||||||
|
also_known_as.append(p)
|
||||||
|
elif self.encoding == p:
|
||||||
|
also_known_as.append(u)
|
||||||
|
return also_known_as
|
||||||
|
|
||||||
|
@property
|
||||||
|
def bom(self) -> bool:
|
||||||
|
return self._has_sig_or_bom
|
||||||
|
|
||||||
|
@property
|
||||||
|
def byte_order_mark(self) -> bool:
|
||||||
|
return self._has_sig_or_bom
|
||||||
|
|
||||||
|
@property
|
||||||
|
def languages(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return the complete list of possible languages found in decoded sequence.
|
||||||
|
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
|
||||||
|
"""
|
||||||
|
return [e[0] for e in self._languages]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def language(self) -> str:
|
||||||
|
"""
|
||||||
|
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
|
||||||
|
"Unknown".
|
||||||
|
"""
|
||||||
|
if not self._languages:
|
||||||
|
# Trying to infer the language based on the given encoding
|
||||||
|
# Its either English or we should not pronounce ourselves in certain cases.
|
||||||
|
if "ascii" in self.could_be_from_charset:
|
||||||
|
return "English"
|
||||||
|
|
||||||
|
# doing it there to avoid circular import
|
||||||
|
from charset_normalizer.cd import encoding_languages, mb_encoding_languages
|
||||||
|
|
||||||
|
languages = (
|
||||||
|
mb_encoding_languages(self.encoding)
|
||||||
|
if is_multi_byte_encoding(self.encoding)
|
||||||
|
else encoding_languages(self.encoding)
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(languages) == 0 or "Latin Based" in languages:
|
||||||
|
return "Unknown"
|
||||||
|
|
||||||
|
return languages[0]
|
||||||
|
|
||||||
|
return self._languages[0][0]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def chaos(self) -> float:
|
||||||
|
return self._mean_mess_ratio
|
||||||
|
|
||||||
|
@property
|
||||||
|
def coherence(self) -> float:
|
||||||
|
if not self._languages:
|
||||||
|
return 0.0
|
||||||
|
return self._languages[0][1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def percent_chaos(self) -> float:
|
||||||
|
return round(self.chaos * 100, ndigits=3)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def percent_coherence(self) -> float:
|
||||||
|
return round(self.coherence * 100, ndigits=3)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def raw(self) -> bytes:
|
||||||
|
"""
|
||||||
|
Original untouched bytes.
|
||||||
|
"""
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
@property
|
||||||
|
def submatch(self) -> List["CharsetMatch"]:
|
||||||
|
return self._leaves
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_submatch(self) -> bool:
|
||||||
|
return len(self._leaves) > 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def alphabets(self) -> List[str]:
|
||||||
|
if self._unicode_ranges is not None:
|
||||||
|
return self._unicode_ranges
|
||||||
|
# list detected ranges
|
||||||
|
detected_ranges: List[Optional[str]] = [
|
||||||
|
unicode_range(char) for char in str(self)
|
||||||
|
]
|
||||||
|
# filter and sort
|
||||||
|
self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
|
||||||
|
return self._unicode_ranges
|
||||||
|
|
||||||
|
@property
|
||||||
|
def could_be_from_charset(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
The complete list of encoding that output the exact SAME str result and therefore could be the originating
|
||||||
|
encoding.
|
||||||
|
This list does include the encoding available in property 'encoding'.
|
||||||
|
"""
|
||||||
|
return [self._encoding] + [m.encoding for m in self._leaves]
|
||||||
|
|
||||||
|
def first(self) -> "CharsetMatch":
|
||||||
|
"""
|
||||||
|
Kept for BC reasons. Will be removed in 3.0.
|
||||||
|
"""
|
||||||
|
return self
|
||||||
|
|
||||||
|
def best(self) -> "CharsetMatch":
|
||||||
|
"""
|
||||||
|
Kept for BC reasons. Will be removed in 3.0.
|
||||||
|
"""
|
||||||
|
return self
|
||||||
|
|
||||||
|
def output(self, encoding: str = "utf_8") -> bytes:
|
||||||
|
"""
|
||||||
|
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
|
||||||
|
Any errors will be simply ignored by the encoder NOT replaced.
|
||||||
|
"""
|
||||||
|
if self._output_encoding is None or self._output_encoding != encoding:
|
||||||
|
self._output_encoding = encoding
|
||||||
|
self._output_payload = str(self).encode(encoding, "replace")
|
||||||
|
|
||||||
|
return self._output_payload # type: ignore
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fingerprint(self) -> str:
|
||||||
|
"""
|
||||||
|
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
|
||||||
|
"""
|
||||||
|
return sha256(self.output()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
class CharsetMatches:
|
||||||
|
"""
|
||||||
|
Container with every CharsetMatch items ordered by default from most probable to the less one.
|
||||||
|
Act like a list(iterable) but does not implements all related methods.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, results: Optional[List[CharsetMatch]] = None):
|
||||||
|
self._results: List[CharsetMatch] = sorted(results) if results else []
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator[CharsetMatch]:
|
||||||
|
yield from self._results
|
||||||
|
|
||||||
|
def __getitem__(self, item: Union[int, str]) -> CharsetMatch:
|
||||||
|
"""
|
||||||
|
Retrieve a single item either by its position or encoding name (alias may be used here).
|
||||||
|
Raise KeyError upon invalid index or encoding not present in results.
|
||||||
|
"""
|
||||||
|
if isinstance(item, int):
|
||||||
|
return self._results[item]
|
||||||
|
if isinstance(item, str):
|
||||||
|
item = iana_name(item, False)
|
||||||
|
for result in self._results:
|
||||||
|
if item in result.could_be_from_charset:
|
||||||
|
return result
|
||||||
|
raise KeyError
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self._results)
|
||||||
|
|
||||||
|
def __bool__(self) -> bool:
|
||||||
|
return len(self._results) > 0
|
||||||
|
|
||||||
|
def append(self, item: CharsetMatch) -> None:
|
||||||
|
"""
|
||||||
|
Insert a single match. Will be inserted accordingly to preserve sort.
|
||||||
|
Can be inserted as a submatch.
|
||||||
|
"""
|
||||||
|
if not isinstance(item, CharsetMatch):
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot append instance '{}' to CharsetMatches".format(
|
||||||
|
str(item.__class__)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
|
||||||
|
if len(item.raw) <= TOO_BIG_SEQUENCE:
|
||||||
|
for match in self._results:
|
||||||
|
if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
|
||||||
|
match.add_submatch(item)
|
||||||
|
return
|
||||||
|
self._results.append(item)
|
||||||
|
self._results = sorted(self._results)
|
||||||
|
|
||||||
|
def best(self) -> Optional["CharsetMatch"]:
|
||||||
|
"""
|
||||||
|
Simply return the first match. Strict equivalent to matches[0].
|
||||||
|
"""
|
||||||
|
if not self._results:
|
||||||
|
return None
|
||||||
|
return self._results[0]
|
||||||
|
|
||||||
|
def first(self) -> Optional["CharsetMatch"]:
|
||||||
|
"""
|
||||||
|
Redundant method, call the method best(). Kept for BC reasons.
|
||||||
|
"""
|
||||||
|
return self.best()
|
||||||
|
|
||||||
|
|
||||||
|
CoherenceMatch = Tuple[str, float]
|
||||||
|
CoherenceMatches = List[CoherenceMatch]
|
||||||
|
|
||||||
|
|
||||||
|
class CliDetectionResult:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
path: str,
|
||||||
|
encoding: Optional[str],
|
||||||
|
encoding_aliases: List[str],
|
||||||
|
alternative_encodings: List[str],
|
||||||
|
language: str,
|
||||||
|
alphabets: List[str],
|
||||||
|
has_sig_or_bom: bool,
|
||||||
|
chaos: float,
|
||||||
|
coherence: float,
|
||||||
|
unicode_path: Optional[str],
|
||||||
|
is_preferred: bool,
|
||||||
|
):
|
||||||
|
self.path: str = path
|
||||||
|
self.unicode_path: Optional[str] = unicode_path
|
||||||
|
self.encoding: Optional[str] = encoding
|
||||||
|
self.encoding_aliases: List[str] = encoding_aliases
|
||||||
|
self.alternative_encodings: List[str] = alternative_encodings
|
||||||
|
self.language: str = language
|
||||||
|
self.alphabets: List[str] = alphabets
|
||||||
|
self.has_sig_or_bom: bool = has_sig_or_bom
|
||||||
|
self.chaos: float = chaos
|
||||||
|
self.coherence: float = coherence
|
||||||
|
self.is_preferred: bool = is_preferred
|
||||||
|
|
||||||
|
@property
|
||||||
|
def __dict__(self) -> Dict[str, Any]: # type: ignore
|
||||||
|
return {
|
||||||
|
"path": self.path,
|
||||||
|
"encoding": self.encoding,
|
||||||
|
"encoding_aliases": self.encoding_aliases,
|
||||||
|
"alternative_encodings": self.alternative_encodings,
|
||||||
|
"language": self.language,
|
||||||
|
"alphabets": self.alphabets,
|
||||||
|
"has_sig_or_bom": self.has_sig_or_bom,
|
||||||
|
"chaos": self.chaos,
|
||||||
|
"coherence": self.coherence,
|
||||||
|
"unicode_path": self.unicode_path,
|
||||||
|
"is_preferred": self.is_preferred,
|
||||||
|
}
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
return dumps(self.__dict__, ensure_ascii=True, indent=4)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue