You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
47 lines
1.1 KiB
Python
47 lines
1.1 KiB
Python
10 years ago
|
#!/usr/bin/env python
|
||
|
"""
|
||
|
Script which takes one or more file paths and reports on their detected
|
||
|
encodings
|
||
|
|
||
|
Example::
|
||
|
|
||
|
% chardetect somefile someotherfile
|
||
|
somefile: windows-1252 with confidence 0.5
|
||
|
someotherfile: ascii with confidence 1.0
|
||
|
|
||
|
If no paths are provided, it takes its input from stdin.
|
||
|
|
||
|
"""
|
||
|
from io import open
|
||
|
from sys import argv, stdin
|
||
|
|
||
|
from chardet.universaldetector import UniversalDetector
|
||
|
|
||
|
|
||
|
def description_of(file, name='stdin'):
|
||
|
"""Return a string describing the probable encoding of a file."""
|
||
|
u = UniversalDetector()
|
||
|
for line in file:
|
||
|
u.feed(line)
|
||
|
u.close()
|
||
|
result = u.result
|
||
|
if result['encoding']:
|
||
|
return '%s: %s with confidence %s' % (name,
|
||
|
result['encoding'],
|
||
|
result['confidence'])
|
||
|
else:
|
||
|
return '%s: no result' % name
|
||
|
|
||
|
|
||
|
def main():
|
||
|
if len(argv) <= 1:
|
||
|
print(description_of(stdin))
|
||
|
else:
|
||
|
for path in argv[1:]:
|
||
|
with open(path, 'rb') as f:
|
||
|
print(description_of(f, path))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|