You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4591 lines
153 KiB
Python

#!/usr/bin/env python
# Copyright (C) 2010-2011 Hideo Hattori
# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
# Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""Automatically formats Python code to conform to the PEP 8 style guide.
Fixes that only need be done once can be added by adding a function of the form
"fix_<code>(source)" to this module. They should return the fixed source code.
These fixes are picked up by apply_global_fixes().
Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
class documentation for more information.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import codecs
import collections
import copy
import difflib
import fnmatch
import inspect
import io
import itertools
import keyword
import locale
import os
import re
import signal
import sys
import textwrap
import token
import tokenize
import warnings
import ast
from configparser import ConfigParser as SafeConfigParser, Error
import pycodestyle
from pycodestyle import STARTSWITH_INDENT_STATEMENT_REGEX
__version__ = '1.7.0'
CR = '\r'
LF = '\n'
CRLF = '\r\n'
PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):')
COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s')
BARE_EXCEPT_REGEX = re.compile(r'except\s*:')
STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):')
DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})')
ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off')
EXIT_CODE_OK = 0
EXIT_CODE_ERROR = 1
EXIT_CODE_EXISTS_DIFF = 2
EXIT_CODE_ARGPARSE_ERROR = 99
# For generating line shortening candidates.
SHORTEN_OPERATOR_GROUPS = frozenset([
frozenset([',']),
frozenset(['%']),
frozenset([',', '(', '[', '{']),
frozenset(['%', '(', '[', '{']),
frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
frozenset(['%', '+', '-', '*', '/', '//']),
])
DEFAULT_IGNORE = 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE
DEFAULT_INDENT_SIZE = 4
# these fixes conflict with each other, if the `--ignore` setting causes both
# to be enabled, disable both of them
CONFLICTING_CODES = ('W503', 'W504')
SELECTED_GLOBAL_FIXED_METHOD_CODES = ['W602', ]
# W602 is handled separately due to the need to avoid "with_traceback".
CODE_TO_2TO3 = {
'E231': ['ws_comma'],
'E721': ['idioms'],
'W601': ['has_key'],
'W603': ['ne'],
'W604': ['repr'],
'W690': ['apply',
'except',
'exitfunc',
'numliterals',
'operator',
'paren',
'reduce',
'renames',
'standarderror',
'sys_exc',
'throw',
'tuple_params',
'xreadlines']}
if sys.platform == 'win32': # pragma: no cover
DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle')
else:
DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
os.path.expanduser('~/.config'),
'pycodestyle')
# fallback, use .pep8
if not os.path.exists(DEFAULT_CONFIG): # pragma: no cover
if sys.platform == 'win32':
DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
else:
DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8')
PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8')
MAX_PYTHON_FILE_DETECTION_BYTES = 1024
def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1):
"""Return opened file with a specific encoding."""
if not encoding:
encoding = detect_encoding(filename, limit_byte_check=limit_byte_check)
return io.open(filename, mode=mode, encoding=encoding,
newline='') # Preserve line endings
def detect_encoding(filename, limit_byte_check=-1):
"""Return file encoding."""
try:
with open(filename, 'rb') as input_file:
from lib2to3.pgen2 import tokenize as lib2to3_tokenize
encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
with open_with_encoding(filename, encoding=encoding) as test_file:
test_file.read(limit_byte_check)
return encoding
except (LookupError, SyntaxError, UnicodeDecodeError):
return 'latin-1'
def readlines_from_file(filename):
"""Return contents of file."""
with open_with_encoding(filename) as input_file:
return input_file.readlines()
def extended_blank_lines(logical_line,
blank_lines,
blank_before,
indent_level,
previous_logical):
"""Check for missing blank lines after class declaration."""
if previous_logical.startswith('def '):
if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line):
yield (0, 'E303 too many blank lines ({})'.format(blank_lines))
elif pycodestyle.DOCSTRING_REGEX.match(previous_logical):
# Missing blank line between class docstring and method declaration.
if (
indent_level and
not blank_lines and
not blank_before and
logical_line.startswith(('def ')) and
'(self' in logical_line
):
yield (0, 'E301 expected 1 blank line, found 0')
pycodestyle.register_check(extended_blank_lines)
def continued_indentation(logical_line, tokens, indent_level, hang_closing,
indent_char, noqa):
"""Override pycodestyle's function to provide indentation information."""
first_row = tokens[0][2][0]
nrows = 1 + tokens[-1][2][0] - first_row
if noqa or nrows == 1:
return
# indent_next tells us whether the next block is indented. Assuming
# that it is indented by 4 spaces, then we should not allow 4-space
# indents on the final continuation line. In turn, some other
# indents are allowed to have an extra 4 spaces.
indent_next = logical_line.endswith(':')
row = depth = 0
valid_hangs = (
(DEFAULT_INDENT_SIZE,)
if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
2 * DEFAULT_INDENT_SIZE)
)
# Remember how many brackets were opened on each line.
parens = [0] * nrows
# Relative indents of physical lines.
rel_indent = [0] * nrows
# For each depth, collect a list of opening rows.
open_rows = [[0]]
# For each depth, memorize the hanging indentation.
hangs = [None]
# Visual indents.
indent_chances = {}
last_indent = tokens[0][2]
indent = [last_indent[1]]
last_token_multiline = None
line = None
last_line = ''
last_line_begins_with_multiline = False
for token_type, text, start, end, line in tokens:
newline = row < start[0] - first_row
if newline:
row = start[0] - first_row
newline = (not last_token_multiline and
token_type not in (tokenize.NL, tokenize.NEWLINE))
last_line_begins_with_multiline = last_token_multiline
if newline:
# This is the beginning of a continuation line.
last_indent = start
# Record the initial indent.
rel_indent[row] = pycodestyle.expand_indent(line) - indent_level
# Identify closing bracket.
close_bracket = (token_type == tokenize.OP and text in ']})')
# Is the indent relative to an opening bracket line?
for open_row in reversed(open_rows[depth]):
hang = rel_indent[row] - rel_indent[open_row]
hanging_indent = hang in valid_hangs
if hanging_indent:
break
if hangs[depth]:
hanging_indent = (hang == hangs[depth])
visual_indent = (not close_bracket and hang > 0 and
indent_chances.get(start[1]))
if close_bracket and indent[depth]:
# Closing bracket for visual indent.
if start[1] != indent[depth]:
yield (start, 'E124 {}'.format(indent[depth]))
elif close_bracket and not hang:
# closing bracket matches indentation of opening bracket's line
if hang_closing:
yield (start, 'E133 {}'.format(indent[depth]))
elif indent[depth] and start[1] < indent[depth]:
if visual_indent is not True:
# Visual indent is broken.
yield (start, 'E128 {}'.format(indent[depth]))
elif (hanging_indent or
(indent_next and
rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
# Hanging indent is verified.
if close_bracket and not hang_closing:
yield (start, 'E123 {}'.format(indent_level +
rel_indent[open_row]))
hangs[depth] = hang
elif visual_indent is True:
# Visual indent is verified.
indent[depth] = start[1]
elif visual_indent in (text, str):
# Ignore token lined up with matching one from a previous line.
pass
else:
one_indented = (indent_level + rel_indent[open_row] +
DEFAULT_INDENT_SIZE)
# Indent is broken.
if hang <= 0:
error = ('E122', one_indented)
elif indent[depth]:
error = ('E127', indent[depth])
elif not close_bracket and hangs[depth]:
error = ('E131', one_indented)
elif hang > DEFAULT_INDENT_SIZE:
error = ('E126', one_indented)
else:
hangs[depth] = hang
error = ('E121', one_indented)
yield (start, '{} {}'.format(*error))
# Look for visual indenting.
if (
parens[row] and
token_type not in (tokenize.NL, tokenize.COMMENT) and
not indent[depth]
):
indent[depth] = start[1]
indent_chances[start[1]] = True
# Deal with implicit string concatenation.
elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
text in ('u', 'ur', 'b', 'br')):
indent_chances[start[1]] = str
# Special case for the "if" statement because len("if (") is equal to
# 4.
elif not indent_chances and not row and not depth and text == 'if':
indent_chances[end[1] + 1] = True
elif text == ':' and line[end[1]:].isspace():
open_rows[depth].append(row)
# Keep track of bracket depth.
if token_type == tokenize.OP:
if text in '([{':
depth += 1
indent.append(0)
hangs.append(None)
if len(open_rows) == depth:
open_rows.append([])
open_rows[depth].append(row)
parens[row] += 1
elif text in ')]}' and depth > 0:
# Parent indents should not be more than this one.
prev_indent = indent.pop() or last_indent[1]
hangs.pop()
for d in range(depth):
if indent[d] > prev_indent:
indent[d] = 0
for ind in list(indent_chances):
if ind >= prev_indent:
del indent_chances[ind]
del open_rows[depth + 1:]
depth -= 1
if depth:
indent_chances[indent[depth]] = True
for idx in range(row, -1, -1):
if parens[idx]:
parens[idx] -= 1
break
assert len(indent) == depth + 1
if (
start[1] not in indent_chances and
# This is for purposes of speeding up E121 (GitHub #90).
not last_line.rstrip().endswith(',')
):
# Allow to line up tokens.
indent_chances[start[1]] = text
last_token_multiline = (start[0] != end[0])
if last_token_multiline:
rel_indent[end[0] - first_row] = rel_indent[row]
last_line = line
if (
indent_next and
not last_line_begins_with_multiline and
pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
):
pos = (start[0], indent[0] + 4)
desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE
if visual_indent:
yield (pos, 'E129 {}'.format(desired_indent))
else:
yield (pos, 'E125 {}'.format(desired_indent))
del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation]
pycodestyle.register_check(continued_indentation)
class FixPEP8(object):
"""Fix invalid code.
Fixer methods are prefixed "fix_". The _fix_source() method looks for these
automatically.
The fixer method can take either one or two arguments (in addition to
self). The first argument is "result", which is the error information from
pycodestyle. The second argument, "logical", is required only for
logical-line fixes.
The fixer method can return the list of modified lines or None. An empty
list would mean that no changes were made. None would mean that only the
line reported in the pycodestyle error was modified. Note that the modified
line numbers that are returned are indexed at 1. This typically would
correspond with the line number reported in the pycodestyle error
information.
[fixed method list]
- e111,e114,e115,e116
- e121,e122,e123,e124,e125,e126,e127,e128,e129
- e201,e202,e203
- e211
- e221,e222,e223,e224,e225
- e231
- e251,e252
- e261,e262
- e271,e272,e273,e274,e275
- e301,e302,e303,e304,e305,e306
- e401,e402
- e502
- e701,e702,e703,e704
- e711,e712,e713,e714
- e722
- e731
- w291
- w503,504
"""
def __init__(self, filename,
options,
contents=None,
long_line_ignore_cache=None):
self.filename = filename
if contents is None:
self.source = readlines_from_file(filename)
else:
sio = io.StringIO(contents)
self.source = sio.readlines()
self.options = options
self.indent_word = _get_indentword(''.join(self.source))
# collect imports line
self.imports = {}
for i, line in enumerate(self.source):
if (line.find("import ") == 0 or line.find("from ") == 0) and \
line not in self.imports:
# collect only import statements that first appeared
self.imports[line] = i
self.long_line_ignore_cache = (
set() if long_line_ignore_cache is None
else long_line_ignore_cache)
# Many fixers are the same even though pycodestyle categorizes them
# differently.
self.fix_e115 = self.fix_e112
self.fix_e121 = self._fix_reindent
self.fix_e122 = self._fix_reindent
self.fix_e123 = self._fix_reindent
self.fix_e124 = self._fix_reindent
self.fix_e126 = self._fix_reindent
self.fix_e127 = self._fix_reindent
self.fix_e128 = self._fix_reindent
self.fix_e129 = self._fix_reindent
self.fix_e133 = self.fix_e131
self.fix_e202 = self.fix_e201
self.fix_e203 = self.fix_e201
self.fix_e211 = self.fix_e201
self.fix_e221 = self.fix_e271
self.fix_e222 = self.fix_e271
self.fix_e223 = self.fix_e271
self.fix_e226 = self.fix_e225
self.fix_e227 = self.fix_e225
self.fix_e228 = self.fix_e225
self.fix_e241 = self.fix_e271
self.fix_e242 = self.fix_e224
self.fix_e252 = self.fix_e225
self.fix_e261 = self.fix_e262
self.fix_e272 = self.fix_e271
self.fix_e273 = self.fix_e271
self.fix_e274 = self.fix_e271
self.fix_e275 = self.fix_e271
self.fix_e306 = self.fix_e301
self.fix_e501 = (
self.fix_long_line_logically if
options and (options.aggressive >= 2 or options.experimental) else
self.fix_long_line_physically)
self.fix_e703 = self.fix_e702
self.fix_w292 = self.fix_w291
self.fix_w293 = self.fix_w291
def _fix_source(self, results):
try:
(logical_start, logical_end) = _find_logical(self.source)
logical_support = True
except (SyntaxError, tokenize.TokenError): # pragma: no cover
logical_support = False
completed_lines = set()
for result in sorted(results, key=_priority_key):
if result['line'] in completed_lines:
continue
fixed_methodname = 'fix_' + result['id'].lower()
if hasattr(self, fixed_methodname):
fix = getattr(self, fixed_methodname)
line_index = result['line'] - 1
original_line = self.source[line_index]
is_logical_fix = len(_get_parameters(fix)) > 2
if is_logical_fix:
logical = None
if logical_support:
logical = _get_logical(self.source,
result,
logical_start,
logical_end)
if logical and set(range(
logical[0][0] + 1,
logical[1][0] + 1)).intersection(
completed_lines):
continue
modified_lines = fix(result, logical)
else:
modified_lines = fix(result)
if modified_lines is None:
# Force logical fixes to report what they modified.
assert not is_logical_fix
if self.source[line_index] == original_line:
modified_lines = []
if modified_lines:
completed_lines.update(modified_lines)
elif modified_lines == []: # Empty list means no fix
if self.options.verbose >= 2:
print(
'---> Not fixing {error} on line {line}'.format(
error=result['id'], line=result['line']),
file=sys.stderr)
else: # We assume one-line fix when None.
completed_lines.add(result['line'])
else:
if self.options.verbose >= 3:
print(
"---> '{}' is not defined.".format(fixed_methodname),
file=sys.stderr)
info = result['info'].strip()
print('---> {}:{}:{}:{}'.format(self.filename,
result['line'],
result['column'],
info),
file=sys.stderr)
def fix(self):
"""Return a version of the source code with PEP 8 violations fixed."""
pep8_options = {
'ignore': self.options.ignore,
'select': self.options.select,
'max_line_length': self.options.max_line_length,
'hang_closing': self.options.hang_closing,
}
results = _execute_pep8(pep8_options, self.source)
if self.options.verbose:
progress = {}
for r in results:
if r['id'] not in progress:
progress[r['id']] = set()
progress[r['id']].add(r['line'])
print('---> {n} issue(s) to fix {progress}'.format(
n=len(results), progress=progress), file=sys.stderr)
if self.options.line_range:
start, end = self.options.line_range
results = [r for r in results
if start <= r['line'] <= end]
self._fix_source(filter_results(source=''.join(self.source),
results=results,
aggressive=self.options.aggressive))
if self.options.line_range:
# If number of lines has changed then change line_range.
count = sum(sline.count('\n')
for sline in self.source[start - 1:end])
self.options.line_range[1] = start + count - 1
return ''.join(self.source)
def _fix_reindent(self, result):
"""Fix a badly indented line.
This is done by adding or removing from its initial indent only.
"""
num_indent_spaces = int(result['info'].split()[1])
line_index = result['line'] - 1
target = self.source[line_index]
self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
def fix_e112(self, result):
"""Fix under-indented comments."""
line_index = result['line'] - 1
target = self.source[line_index]
if not target.lstrip().startswith('#'):
# Don't screw with invalid syntax.
return []
self.source[line_index] = self.indent_word + target
def fix_e113(self, result):
"""Fix unexpected indentation."""
line_index = result['line'] - 1
target = self.source[line_index]
indent = _get_indentation(target)
stripped = target.lstrip()
self.source[line_index] = indent[1:] + stripped
def fix_e116(self, result):
"""Fix over-indented comments."""
line_index = result['line'] - 1
target = self.source[line_index]
indent = _get_indentation(target)
stripped = target.lstrip()
if not stripped.startswith('#'):
# Don't screw with invalid syntax.
return []
self.source[line_index] = indent[1:] + stripped
def fix_e117(self, result):
"""Fix over-indented."""
line_index = result['line'] - 1
target = self.source[line_index]
indent = _get_indentation(target)
if indent == '\t':
return []
stripped = target.lstrip()
self.source[line_index] = indent[1:] + stripped
def fix_e125(self, result):
"""Fix indentation undistinguish from the next logical line."""
num_indent_spaces = int(result['info'].split()[1])
line_index = result['line'] - 1
target = self.source[line_index]
spaces_to_add = num_indent_spaces - len(_get_indentation(target))
indent = len(_get_indentation(target))
modified_lines = []
while len(_get_indentation(self.source[line_index])) >= indent:
self.source[line_index] = (' ' * spaces_to_add +
self.source[line_index])
modified_lines.append(1 + line_index) # Line indexed at 1.
line_index -= 1
return modified_lines
def fix_e131(self, result):
"""Fix indentation undistinguish from the next logical line."""
num_indent_spaces = int(result['info'].split()[1])
line_index = result['line'] - 1
target = self.source[line_index]
spaces_to_add = num_indent_spaces - len(_get_indentation(target))
indent_length = len(_get_indentation(target))
spaces_to_add = num_indent_spaces - indent_length
if num_indent_spaces == 0 and indent_length == 0:
spaces_to_add = 4
if spaces_to_add >= 0:
self.source[line_index] = (' ' * spaces_to_add +
self.source[line_index])
else:
offset = abs(spaces_to_add)
self.source[line_index] = self.source[line_index][offset:]
def fix_e201(self, result):
"""Remove extraneous whitespace."""
line_index = result['line'] - 1
target = self.source[line_index]
offset = result['column'] - 1
fixed = fix_whitespace(target,
offset=offset,
replacement='')
self.source[line_index] = fixed
def fix_e224(self, result):
"""Remove extraneous whitespace around operator."""
target = self.source[result['line'] - 1]
offset = result['column'] - 1
fixed = target[:offset] + target[offset:].replace('\t', ' ')
self.source[result['line'] - 1] = fixed
def fix_e225(self, result):
"""Fix missing whitespace around operator."""
target = self.source[result['line'] - 1]
offset = result['column'] - 1
fixed = target[:offset] + ' ' + target[offset:]
# Only proceed if non-whitespace characters match.
# And make sure we don't break the indentation.
if (
fixed.replace(' ', '') == target.replace(' ', '') and
_get_indentation(fixed) == _get_indentation(target)
):
self.source[result['line'] - 1] = fixed
error_code = result.get('id', 0)
try:
ts = generate_tokens(fixed)
except (SyntaxError, tokenize.TokenError):
return
if not check_syntax(fixed.lstrip()):
return
errors = list(
pycodestyle.missing_whitespace_around_operator(fixed, ts))
for e in reversed(errors):
if error_code != e[1].split()[0]:
continue
offset = e[0][1]
fixed = fixed[:offset] + ' ' + fixed[offset:]
self.source[result['line'] - 1] = fixed
else:
return []
def fix_e231(self, result):
"""Add missing whitespace."""
line_index = result['line'] - 1
target = self.source[line_index]
offset = result['column']
fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip()
self.source[line_index] = fixed
def fix_e251(self, result):
"""Remove whitespace around parameter '=' sign."""
line_index = result['line'] - 1
target = self.source[line_index]
# This is necessary since pycodestyle sometimes reports columns that
# goes past the end of the physical line. This happens in cases like,
# foo(bar\n=None)
c = min(result['column'] - 1,
len(target) - 1)
if target[c].strip():
fixed = target
else:
fixed = target[:c].rstrip() + target[c:].lstrip()
# There could be an escaped newline
#
# def foo(a=\
# 1)
if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
self.source[line_index] = fixed.rstrip('\n\r \t\\')
self.source[line_index + 1] = self.source[line_index + 1].lstrip()
return [line_index + 1, line_index + 2] # Line indexed at 1
self.source[result['line'] - 1] = fixed
def fix_e262(self, result):
"""Fix spacing after comment hash."""
target = self.source[result['line'] - 1]
offset = result['column']
code = target[:offset].rstrip(' \t#')
comment = target[offset:].lstrip(' \t#')
fixed = code + (' # ' + comment if comment.strip() else '\n')
self.source[result['line'] - 1] = fixed
def fix_e271(self, result):
"""Fix extraneous whitespace around keywords."""
line_index = result['line'] - 1
target = self.source[line_index]
offset = result['column'] - 1
fixed = fix_whitespace(target,
offset=offset,
replacement=' ')
if fixed == target:
return []
else:
self.source[line_index] = fixed
def fix_e301(self, result):
"""Add missing blank line."""
cr = '\n'
self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
def fix_e302(self, result):
"""Add missing 2 blank lines."""
add_linenum = 2 - int(result['info'].split()[-1])
offset = 1
if self.source[result['line'] - 2].strip() == "\\":
offset = 2
cr = '\n' * add_linenum
self.source[result['line'] - offset] = (
cr + self.source[result['line'] - offset]
)
def fix_e303(self, result):
"""Remove extra blank lines."""
delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
delete_linenum = max(1, delete_linenum)
# We need to count because pycodestyle reports an offset line number if
# there are comments.
cnt = 0
line = result['line'] - 2
modified_lines = []
while cnt < delete_linenum and line >= 0:
if not self.source[line].strip():
self.source[line] = ''
modified_lines.append(1 + line) # Line indexed at 1
cnt += 1
line -= 1
return modified_lines
def fix_e304(self, result):
"""Remove blank line following function decorator."""
line = result['line'] - 2
if not self.source[line].strip():
self.source[line] = ''
def fix_e305(self, result):
"""Add missing 2 blank lines after end of function or class."""
add_delete_linenum = 2 - int(result['info'].split()[-1])
cnt = 0
offset = result['line'] - 2
modified_lines = []
if add_delete_linenum < 0:
# delete cr
add_delete_linenum = abs(add_delete_linenum)
while cnt < add_delete_linenum and offset >= 0:
if not self.source[offset].strip():
self.source[offset] = ''
modified_lines.append(1 + offset) # Line indexed at 1
cnt += 1
offset -= 1
else:
# add cr
cr = '\n'
# check comment line
while True:
if offset < 0:
break
line = self.source[offset].lstrip()
if not line:
break
if line[0] != '#':
break
offset -= 1
offset += 1
self.source[offset] = cr + self.source[offset]
modified_lines.append(1 + offset) # Line indexed at 1.
return modified_lines
def fix_e401(self, result):
"""Put imports on separate lines."""
line_index = result['line'] - 1
target = self.source[line_index]
offset = result['column'] - 1
if not target.lstrip().startswith('import'):
return []
indentation = re.split(pattern=r'\bimport\b',
string=target, maxsplit=1)[0]
fixed = (target[:offset].rstrip('\t ,') + '\n' +
indentation + 'import ' + target[offset:].lstrip('\t ,'))
self.source[line_index] = fixed
def fix_e402(self, result):
(line_index, offset, target) = get_index_offset_contents(result,
self.source)
for i in range(1, 100):
line = "".join(self.source[line_index:line_index+i])
try:
generate_tokens("".join(line))
except (SyntaxError, tokenize.TokenError):
continue
break
if not (target in self.imports and self.imports[target] != line_index):
mod_offset = get_module_imports_on_top_of_file(self.source,
line_index)
self.source[mod_offset] = line + self.source[mod_offset]
for offset in range(i):
self.source[line_index+offset] = ''
def fix_long_line_logically(self, result, logical):
"""Try to make lines fit within --max-line-length characters."""
if (
not logical or
len(logical[2]) == 1 or
self.source[result['line'] - 1].lstrip().startswith('#')
):
return self.fix_long_line_physically(result)
start_line_index = logical[0][0]
end_line_index = logical[1][0]
logical_lines = logical[2]
previous_line = get_item(self.source, start_line_index - 1, default='')
next_line = get_item(self.source, end_line_index + 1, default='')
single_line = join_logical_line(''.join(logical_lines))
try:
fixed = self.fix_long_line(
target=single_line,
previous_line=previous_line,
next_line=next_line,
original=''.join(logical_lines))
except (SyntaxError, tokenize.TokenError):
return self.fix_long_line_physically(result)
if fixed:
for line_index in range(start_line_index, end_line_index + 1):
self.source[line_index] = ''
self.source[start_line_index] = fixed
return range(start_line_index + 1, end_line_index + 1)
return []
def fix_long_line_physically(self, result):
"""Try to make lines fit within --max-line-length characters."""
line_index = result['line'] - 1
target = self.source[line_index]
previous_line = get_item(self.source, line_index - 1, default='')
next_line = get_item(self.source, line_index + 1, default='')
try:
fixed = self.fix_long_line(
target=target,
previous_line=previous_line,
next_line=next_line,
original=target)
except (SyntaxError, tokenize.TokenError):
return []
if fixed:
self.source[line_index] = fixed
return [line_index + 1]
return []
def fix_long_line(self, target, previous_line,
next_line, original):
cache_entry = (target, previous_line, next_line)
if cache_entry in self.long_line_ignore_cache:
return []
if target.lstrip().startswith('#'):
if self.options.aggressive:
# Wrap commented lines.
return shorten_comment(
line=target,
max_line_length=self.options.max_line_length,
last_comment=not next_line.lstrip().startswith('#'))
return []
fixed = get_fixed_long_line(
target=target,
previous_line=previous_line,
original=original,
indent_word=self.indent_word,
max_line_length=self.options.max_line_length,
aggressive=self.options.aggressive,
experimental=self.options.experimental,
verbose=self.options.verbose)
if fixed and not code_almost_equal(original, fixed):
return fixed
self.long_line_ignore_cache.add(cache_entry)
return None
def fix_e502(self, result):
"""Remove extraneous escape of newline."""
(line_index, _, target) = get_index_offset_contents(result,
self.source)
self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
def fix_e701(self, result):
"""Put colon-separated compound statement on separate lines."""
line_index = result['line'] - 1
target = self.source[line_index]
c = result['column']
fixed_source = (target[:c] + '\n' +
_get_indentation(target) + self.indent_word +
target[c:].lstrip('\n\r \t\\'))
self.source[result['line'] - 1] = fixed_source
return [result['line'], result['line'] + 1]
def fix_e702(self, result, logical):
"""Put semicolon-separated compound statement on separate lines."""
if not logical:
return [] # pragma: no cover
logical_lines = logical[2]
# Avoid applying this when indented.
# https://docs.python.org/reference/compound_stmts.html
for line in logical_lines:
if (result['id'] == 'E702' and ':' in line
and STARTSWITH_INDENT_STATEMENT_REGEX.match(line)):
if self.options.verbose:
print(
'---> avoid fixing {error} with '
'other compound statements'.format(error=result['id']),
file=sys.stderr
)
return []
line_index = result['line'] - 1
target = self.source[line_index]
if target.rstrip().endswith('\\'):
# Normalize '1; \\\n2' into '1; 2'.
self.source[line_index] = target.rstrip('\n \r\t\\')
self.source[line_index + 1] = self.source[line_index + 1].lstrip()
return [line_index + 1, line_index + 2]
if target.rstrip().endswith(';'):
self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
return [line_index + 1]
offset = result['column'] - 1
first = target[:offset].rstrip(';').rstrip()
second = (_get_indentation(logical_lines[0]) +
target[offset:].lstrip(';').lstrip())
# Find inline comment.
inline_comment = None
if target[offset:].lstrip(';').lstrip()[:2] == '# ':
inline_comment = target[offset:].lstrip(';')
if inline_comment:
self.source[line_index] = first + inline_comment
else:
self.source[line_index] = first + '\n' + second
return [line_index + 1]
def fix_e704(self, result):
"""Fix multiple statements on one line def"""
(line_index, _, target) = get_index_offset_contents(result,
self.source)
match = STARTSWITH_DEF_REGEX.match(target)
if match:
self.source[line_index] = '{}\n{}{}'.format(
match.group(0),
_get_indentation(target) + self.indent_word,
target[match.end(0):].lstrip())
def fix_e711(self, result):
"""Fix comparison with None."""
(line_index, offset, target) = get_index_offset_contents(result,
self.source)
right_offset = offset + 2
if right_offset >= len(target):
return []
left = target[:offset].rstrip()
center = target[offset:right_offset]
right = target[right_offset:].lstrip()
if center.strip() == '==':
new_center = 'is'
elif center.strip() == '!=':
new_center = 'is not'
else:
return []
self.source[line_index] = ' '.join([left, new_center, right])
def fix_e712(self, result):
"""Fix (trivial case of) comparison with boolean."""
(line_index, offset, target) = get_index_offset_contents(result,
self.source)
# Handle very easy "not" special cases.
if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target):
self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:',
r'if not \1:', target, count=1)
elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target):
self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:',
r'if not \1:', target, count=1)
else:
right_offset = offset + 2
if right_offset >= len(target):
return []
left = target[:offset].rstrip()
center = target[offset:right_offset]
right = target[right_offset:].lstrip()
# Handle simple cases only.
new_right = None
if center.strip() == '==':
if re.match(r'\bTrue\b', right):
new_right = re.sub(r'\bTrue\b *', '', right, count=1)
elif center.strip() == '!=':
if re.match(r'\bFalse\b', right):
new_right = re.sub(r'\bFalse\b *', '', right, count=1)
if new_right is None:
return []
if new_right[0].isalnum():
new_right = ' ' + new_right
self.source[line_index] = left + new_right
def fix_e713(self, result):
"""Fix (trivial case of) non-membership check."""
(line_index, offset, target) = get_index_offset_contents(result,
self.source)
# to convert once 'not in' -> 'in'
before_target = target[:offset]
target = target[offset:]
match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
notin_pos_start, notin_pos_end = 0, 0
if match_notin:
notin_pos_start = match_notin.start(1)
notin_pos_end = match_notin.end()
target = '{}{} {}'.format(
target[:notin_pos_start], 'in', target[notin_pos_end:])
# fix 'not in'
match = COMPARE_NEGATIVE_REGEX.search(target)
if match:
if match.group(3) == 'in':
pos_start = match.start(1)
new_target = '{5}{0}{1} {2} {3} {4}'.format(
target[:pos_start], match.group(2), match.group(1),
match.group(3), target[match.end():], before_target)
if match_notin:
# revert 'in' -> 'not in'
pos_start = notin_pos_start + offset
pos_end = notin_pos_end + offset - 4 # len('not ')
new_target = '{}{} {}'.format(
new_target[:pos_start], 'not in', new_target[pos_end:])
self.source[line_index] = new_target
def fix_e714(self, result):
"""Fix object identity should be 'is not' case."""
(line_index, offset, target) = get_index_offset_contents(result,
self.source)
# to convert once 'is not' -> 'is'
before_target = target[:offset]
target = target[offset:]
match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
isnot_pos_start, isnot_pos_end = 0, 0
if match_isnot:
isnot_pos_start = match_isnot.start(1)
isnot_pos_end = match_isnot.end()
target = '{}{} {}'.format(
target[:isnot_pos_start], 'in', target[isnot_pos_end:])
match = COMPARE_NEGATIVE_REGEX.search(target)
if match:
if match.group(3).startswith('is'):
pos_start = match.start(1)
new_target = '{5}{0}{1} {2} {3} {4}'.format(
target[:pos_start], match.group(2), match.group(3),
match.group(1), target[match.end():], before_target)
if match_isnot:
# revert 'is' -> 'is not'
pos_start = isnot_pos_start + offset
pos_end = isnot_pos_end + offset - 4 # len('not ')
new_target = '{}{} {}'.format(
new_target[:pos_start], 'is not', new_target[pos_end:])
self.source[line_index] = new_target
def fix_e722(self, result):
"""fix bare except"""
(line_index, _, target) = get_index_offset_contents(result,
self.source)
match = BARE_EXCEPT_REGEX.search(target)
if match:
self.source[line_index] = '{}{}{}'.format(
target[:result['column'] - 1], "except BaseException:",
target[match.end():])
def fix_e731(self, result):
"""Fix do not assign a lambda expression check."""
(line_index, _, target) = get_index_offset_contents(result,
self.source)
match = LAMBDA_REGEX.search(target)
if match:
end = match.end()
self.source[line_index] = '{}def {}({}): return {}'.format(
target[:match.start(0)], match.group(1), match.group(2),
target[end:].lstrip())
def fix_w291(self, result):
"""Remove trailing whitespace."""
fixed_line = self.source[result['line'] - 1].rstrip()
self.source[result['line'] - 1] = fixed_line + '\n'
def fix_w391(self, _):
"""Remove trailing blank lines."""
blank_count = 0
for line in reversed(self.source):
line = line.rstrip()
if line:
break
else:
blank_count += 1
original_length = len(self.source)
self.source = self.source[:original_length - blank_count]
return range(1, 1 + original_length)
def fix_w503(self, result):
(line_index, _, target) = get_index_offset_contents(result,
self.source)
one_string_token = target.split()[0]
try:
ts = generate_tokens(one_string_token)
except (SyntaxError, tokenize.TokenError):
return
if not _is_binary_operator(ts[0][0], one_string_token):
return
# find comment
comment_index = 0
found_not_comment_only_line = False
comment_only_linenum = 0
for i in range(5):
# NOTE: try to parse code in 5 times
if (line_index - i) < 0:
break
from_index = line_index - i - 1
if from_index < 0 or len(self.source) <= from_index:
break
to_index = line_index + 1
strip_line = self.source[from_index].lstrip()
if (
not found_not_comment_only_line and
strip_line and strip_line[0] == '#'
):
comment_only_linenum += 1
continue
found_not_comment_only_line = True
try:
ts = generate_tokens("".join(self.source[from_index:to_index]))
except (SyntaxError, tokenize.TokenError):
continue
newline_count = 0
newline_index = []
for index, t in enumerate(ts):
if t[0] in (tokenize.NEWLINE, tokenize.NL):
newline_index.append(index)
newline_count += 1
if newline_count > 2:
tts = ts[newline_index[-3]:]
else:
tts = ts
old = []
for t in tts:
if t[0] in (tokenize.NEWLINE, tokenize.NL):
newline_count -= 1
if newline_count <= 1:
break
if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL:
comment_index = old[3][1]
break
old = t
break
i = target.index(one_string_token)
fix_target_line = line_index - 1 - comment_only_linenum
self.source[line_index] = '{}{}'.format(
target[:i], target[i + len(one_string_token):].lstrip())
nl = find_newline(self.source[fix_target_line:line_index])
before_line = self.source[fix_target_line]
bl = before_line.index(nl)
if comment_index:
self.source[fix_target_line] = '{} {} {}'.format(
before_line[:comment_index], one_string_token,
before_line[comment_index + 1:])
else:
if before_line[:bl].endswith("#"):
# special case
# see: https://github.com/hhatto/autopep8/issues/503
self.source[fix_target_line] = '{}{} {}'.format(
before_line[:bl-2], one_string_token, before_line[bl-2:])
else:
self.source[fix_target_line] = '{} {}{}'.format(
before_line[:bl], one_string_token, before_line[bl:])
def fix_w504(self, result):
(line_index, _, target) = get_index_offset_contents(result,
self.source)
# NOTE: is not collect pointed out in pycodestyle==2.4.0
comment_index = 0
operator_position = None # (start_position, end_position)
for i in range(1, 6):
to_index = line_index + i
try:
ts = generate_tokens("".join(self.source[line_index:to_index]))
except (SyntaxError, tokenize.TokenError):
continue
newline_count = 0
newline_index = []
for index, t in enumerate(ts):
if _is_binary_operator(t[0], t[1]):
if t[2][0] == 1 and t[3][0] == 1:
operator_position = (t[2][1], t[3][1])
elif t[0] == tokenize.NAME and t[1] in ("and", "or"):
if t[2][0] == 1 and t[3][0] == 1:
operator_position = (t[2][1], t[3][1])
elif t[0] in (tokenize.NEWLINE, tokenize.NL):
newline_index.append(index)
newline_count += 1
if newline_count > 2:
tts = ts[:newline_index[-3]]
else:
tts = ts
old = []
for t in tts:
if tokenize.COMMENT == t[0] and old:
comment_row, comment_index = old[3]
break
old = t
break
if not operator_position:
return
target_operator = target[operator_position[0]:operator_position[1]]
if comment_index and comment_row == 1:
self.source[line_index] = '{}{}'.format(
target[:operator_position[0]].rstrip(),
target[comment_index:])
else:
self.source[line_index] = '{}{}{}'.format(
target[:operator_position[0]].rstrip(),
target[operator_position[1]:].lstrip(),
target[operator_position[1]:])
next_line = self.source[line_index + 1]
next_line_indent = 0
m = re.match(r'\s*', next_line)
if m:
next_line_indent = m.span()[1]
self.source[line_index + 1] = '{}{} {}'.format(
next_line[:next_line_indent], target_operator,
next_line[next_line_indent:])
def fix_w605(self, result):
(line_index, offset, target) = get_index_offset_contents(result,
self.source)
self.source[line_index] = '{}\\{}'.format(
target[:offset + 1], target[offset + 1:])
def get_module_imports_on_top_of_file(source, import_line_index):
"""return import or from keyword position
example:
> 0: import sys
1: import os
2:
3: def function():
"""
def is_string_literal(line):
if line[0] in 'uUbB':
line = line[1:]
if line and line[0] in 'rR':
line = line[1:]
return line and (line[0] == '"' or line[0] == "'")
def is_future_import(line):
nodes = ast.parse(line)
for n in nodes.body:
if isinstance(n, ast.ImportFrom) and n.module == '__future__':
return True
return False
def has_future_import(source):
offset = 0
line = ''
for _, next_line in source:
for line_part in next_line.strip().splitlines(True):
line = line + line_part
try:
return is_future_import(line), offset
except SyntaxError:
continue
offset += 1
return False, offset
allowed_try_keywords = ('try', 'except', 'else', 'finally')
in_docstring = False
docstring_kind = '"""'
source_stream = iter(enumerate(source))
for cnt, line in source_stream:
if not in_docstring:
m = DOCSTRING_START_REGEX.match(line.lstrip())
if m is not None:
in_docstring = True
docstring_kind = m.group('kind')
remain = line[m.end(): m.endpos].rstrip()
if remain[-3:] == docstring_kind: # one line doc
in_docstring = False
continue
if in_docstring:
if line.rstrip()[-3:] == docstring_kind:
in_docstring = False
continue
if not line.rstrip():
continue
elif line.startswith('#'):
continue
if line.startswith('import '):
if cnt == import_line_index:
continue
return cnt
elif line.startswith('from '):
if cnt == import_line_index:
continue
hit, offset = has_future_import(
itertools.chain([(cnt, line)], source_stream)
)
if hit:
# move to the back
return cnt + offset + 1
return cnt
elif pycodestyle.DUNDER_REGEX.match(line):
return cnt
elif any(line.startswith(kw) for kw in allowed_try_keywords):
continue
elif is_string_literal(line):
return cnt
else:
return cnt
return 0
def get_index_offset_contents(result, source):
"""Return (line_index, column_offset, line_contents)."""
line_index = result['line'] - 1
return (line_index,
result['column'] - 1,
source[line_index])
def get_fixed_long_line(target, previous_line, original,
indent_word=' ', max_line_length=79,
aggressive=False, experimental=False, verbose=False):
"""Break up long line and return result.
Do this by generating multiple reformatted candidates and then
ranking the candidates to heuristically select the best option.
"""
indent = _get_indentation(target)
source = target[len(indent):]
assert source.lstrip() == source
assert not target.lstrip().startswith('#')
# Check for partial multiline.
tokens = list(generate_tokens(source))
candidates = shorten_line(
tokens, source, indent,
indent_word,
max_line_length,
aggressive=aggressive,
experimental=experimental,
previous_line=previous_line)
# Also sort alphabetically as a tie breaker (for determinism).
candidates = sorted(
sorted(set(candidates).union([target, original])),
key=lambda x: line_shortening_rank(
x,
indent_word,
max_line_length,
experimental=experimental))
if verbose >= 4:
print(('-' * 79 + '\n').join([''] + candidates + ['']),
file=wrap_output(sys.stderr, 'utf-8'))
if candidates:
best_candidate = candidates[0]
# Don't allow things to get longer.
if longest_line_length(best_candidate) > longest_line_length(original):
return None
return best_candidate
def longest_line_length(code):
"""Return length of longest line."""
if len(code) == 0:
return 0
return max(len(line) for line in code.splitlines())
def join_logical_line(logical_line):
"""Return single line based on logical line input."""
indentation = _get_indentation(logical_line)
return indentation + untokenize_without_newlines(
generate_tokens(logical_line.lstrip())) + '\n'
def untokenize_without_newlines(tokens):
"""Return source code based on tokens."""
text = ''
last_row = 0
last_column = -1
for t in tokens:
token_string = t[1]
(start_row, start_column) = t[2]
(end_row, end_column) = t[3]
if start_row > last_row:
last_column = 0
if (
(start_column > last_column or token_string == '\n') and
not text.endswith(' ')
):
text += ' '
if token_string != '\n':
text += token_string
last_row = end_row
last_column = end_column
return text.rstrip()
def _find_logical(source_lines):
# Make a variable which is the index of all the starts of lines.
logical_start = []
logical_end = []
last_newline = True
parens = 0
for t in generate_tokens(''.join(source_lines)):
if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
tokenize.INDENT, tokenize.NL,
tokenize.ENDMARKER]:
continue
if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
last_newline = True
logical_end.append((t[3][0] - 1, t[2][1]))
continue
if last_newline and not parens:
logical_start.append((t[2][0] - 1, t[2][1]))
last_newline = False
if t[0] == tokenize.OP:
if t[1] in '([{':
parens += 1
elif t[1] in '}])':
parens -= 1
return (logical_start, logical_end)
def _get_logical(source_lines, result, logical_start, logical_end):
"""Return the logical line corresponding to the result.
Assumes input is already E702-clean.
"""
row = result['line'] - 1
col = result['column'] - 1
ls = None
le = None
for i in range(0, len(logical_start), 1):
assert logical_end
x = logical_end[i]
if x[0] > row or (x[0] == row and x[1] > col):
le = x
ls = logical_start[i]
break
if ls is None:
return None
original = source_lines[ls[0]:le[0] + 1]
return ls, le, original
def get_item(items, index, default=None):
if 0 <= index < len(items):
return items[index]
return default
def reindent(source, indent_size, leave_tabs=False):
"""Reindent all lines."""
reindenter = Reindenter(source, leave_tabs)
return reindenter.run(indent_size)
def code_almost_equal(a, b):
"""Return True if code is similar.
Ignore whitespace when comparing specific line.
"""
split_a = split_and_strip_non_empty_lines(a)
split_b = split_and_strip_non_empty_lines(b)
if len(split_a) != len(split_b):
return False
for (index, _) in enumerate(split_a):
if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
return False
return True
def split_and_strip_non_empty_lines(text):
"""Return lines split by newline.
Ignore empty lines.
"""
return [line.strip() for line in text.splitlines() if line.strip()]
def fix_e265(source, aggressive=False): # pylint: disable=unused-argument
"""Format block comments."""
if '#' not in source:
# Optimization.
return source
ignored_line_numbers = multiline_string_lines(
source,
include_docstrings=True) | set(commented_out_code_lines(source))
fixed_lines = []
sio = io.StringIO(source)
for (line_number, line) in enumerate(sio.readlines(), start=1):
if (
line.lstrip().startswith('#') and
line_number not in ignored_line_numbers and
not pycodestyle.noqa(line)
):
indentation = _get_indentation(line)
line = line.lstrip()
# Normalize beginning if not a shebang.
if len(line) > 1:
pos = next((index for index, c in enumerate(line)
if c != '#'))
if (
# Leave multiple spaces like '# ' alone.
(line[:pos].count('#') > 1 or line[1].isalnum() or
not line[1].isspace()) and
line[1] not in ':!' and
# Leave stylistic outlined blocks alone.
not line.rstrip().endswith('#')
):
line = '# ' + line.lstrip('# \t')
fixed_lines.append(indentation + line)
else:
fixed_lines.append(line)
return ''.join(fixed_lines)
def refactor(source, fixer_names, ignore=None, filename=''):
"""Return refactored code using lib2to3.
Skip if ignore string is produced in the refactored code.
"""
not_found_end_of_file_newline = source and source.rstrip("\r\n") == source
if not_found_end_of_file_newline:
input_source = source + "\n"
else:
input_source = source
from lib2to3 import pgen2
try:
new_text = refactor_with_2to3(input_source,
fixer_names=fixer_names,
filename=filename)
except (pgen2.parse.ParseError,
SyntaxError,
UnicodeDecodeError,
UnicodeEncodeError):
return source
if ignore:
if ignore in new_text and ignore not in source:
return source
if not_found_end_of_file_newline:
return new_text.rstrip("\r\n")
return new_text
def code_to_2to3(select, ignore, where='', verbose=False):
fixes = set()
for code, fix in CODE_TO_2TO3.items():
if code_match(code, select=select, ignore=ignore):
if verbose:
print('---> Applying {} fix for {}'.format(where,
code.upper()),
file=sys.stderr)
fixes |= set(fix)
return fixes
def fix_2to3(source,
aggressive=True, select=None, ignore=None, filename='',
where='global', verbose=False):
"""Fix various deprecated code (via lib2to3)."""
if not aggressive:
return source
select = select or []
ignore = ignore or []
return refactor(source,
code_to_2to3(select=select,
ignore=ignore,
where=where,
verbose=verbose),
filename=filename)
def fix_w602(source, aggressive=True):
"""Fix deprecated form of raising exception."""
if not aggressive:
return source
return refactor(source, ['raise'], ignore='with_traceback')
def find_newline(source):
"""Return type of newline used in source.
Input is a list of lines.
"""
assert not isinstance(source, str)
counter = collections.defaultdict(int)
for line in source:
if line.endswith(CRLF):
counter[CRLF] += 1
elif line.endswith(CR):
counter[CR] += 1
elif line.endswith(LF):
counter[LF] += 1
return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
def _get_indentword(source):
"""Return indentation type."""
indent_word = ' ' # Default in case source has no indentation
try:
for t in generate_tokens(source):
if t[0] == token.INDENT:
indent_word = t[1]
break
except (SyntaxError, tokenize.TokenError):
pass
return indent_word
def _get_indentation(line):
"""Return leading whitespace."""
if line.strip():
non_whitespace_index = len(line) - len(line.lstrip())
return line[:non_whitespace_index]
return ''
def get_diff_text(old, new, filename):
"""Return text of unified diff between old and new."""
newline = '\n'
diff = difflib.unified_diff(
old, new,
'original/' + filename,
'fixed/' + filename,
lineterm=newline)
text = ''
for line in diff:
text += line
# Work around missing newline (http://bugs.python.org/issue2142).
if text and not line.endswith(newline):
text += newline + r'\ No newline at end of file' + newline
return text
def _priority_key(pep8_result):
"""Key for sorting PEP8 results.
Global fixes should be done first. This is important for things like
indentation.
"""
priority = [
# Fix multiline colon-based before semicolon based.
'e701',
# Break multiline statements early.
'e702',
# Things that make lines longer.
'e225', 'e231',
# Remove extraneous whitespace before breaking lines.
'e201',
# Shorten whitespace in comment before resorting to wrapping.
'e262'
]
middle_index = 10000
lowest_priority = [
# We need to shorten lines last since the logical fixer can get in a
# loop, which causes us to exit early.
'e501',
]
key = pep8_result['id'].lower()
try:
return priority.index(key)
except ValueError:
try:
return middle_index + lowest_priority.index(key) + 1
except ValueError:
return middle_index
def shorten_line(tokens, source, indentation, indent_word, max_line_length,
aggressive=False, experimental=False, previous_line=''):
"""Separate line at OPERATOR.
Multiple candidates will be yielded.
"""
for candidate in _shorten_line(tokens=tokens,
source=source,
indentation=indentation,
indent_word=indent_word,
aggressive=aggressive,
previous_line=previous_line):
yield candidate
if aggressive:
for key_token_strings in SHORTEN_OPERATOR_GROUPS:
shortened = _shorten_line_at_tokens(
tokens=tokens,
source=source,
indentation=indentation,
indent_word=indent_word,
key_token_strings=key_token_strings,
aggressive=aggressive)
if shortened is not None and shortened != source:
yield shortened
if experimental:
for shortened in _shorten_line_at_tokens_new(
tokens=tokens,
source=source,
indentation=indentation,
max_line_length=max_line_length):
yield shortened
def _shorten_line(tokens, source, indentation, indent_word,
aggressive=False, previous_line=''):
"""Separate line at OPERATOR.
The input is expected to be free of newlines except for inside multiline
strings and at the end.
Multiple candidates will be yielded.
"""
for (token_type,
token_string,
start_offset,
end_offset) in token_offsets(tokens):
if (
token_type == tokenize.COMMENT and
not is_probably_part_of_multiline(previous_line) and
not is_probably_part_of_multiline(source) and
not source[start_offset + 1:].strip().lower().startswith(
('noqa', 'pragma:', 'pylint:'))
):
# Move inline comments to previous line.
first = source[:start_offset]
second = source[start_offset:]
yield (indentation + second.strip() + '\n' +
indentation + first.strip() + '\n')
elif token_type == token.OP and token_string != '=':
# Don't break on '=' after keyword as this violates PEP 8.
assert token_type != token.INDENT
first = source[:end_offset]
second_indent = indentation
if (first.rstrip().endswith('(') and
source[end_offset:].lstrip().startswith(')')):
pass
elif first.rstrip().endswith('('):
second_indent += indent_word
elif '(' in first:
second_indent += ' ' * (1 + first.find('('))
else:
second_indent += indent_word
second = (second_indent + source[end_offset:].lstrip())
if (
not second.strip() or
second.lstrip().startswith('#')
):
continue
# Do not begin a line with a comma
if second.lstrip().startswith(','):
continue
# Do end a line with a dot
if first.rstrip().endswith('.'):
continue
if token_string in '+-*/':
fixed = first + ' \\' + '\n' + second
else:
fixed = first + '\n' + second
# Only fix if syntax is okay.
if check_syntax(normalize_multiline(fixed)
if aggressive else fixed):
yield indentation + fixed
def _is_binary_operator(token_type, text):
return ((token_type == tokenize.OP or text in ['and', 'or']) and
text not in '()[]{},:.;@=%~')
# A convenient way to handle tokens.
Token = collections.namedtuple('Token', ['token_type', 'token_string',
'spos', 'epos', 'line'])
class ReformattedLines(object):
"""The reflowed lines of atoms.
Each part of the line is represented as an "atom." They can be moved
around when need be to get the optimal formatting.
"""
###########################################################################
# Private Classes
class _Indent(object):
"""Represent an indentation in the atom stream."""
def __init__(self, indent_amt):
self._indent_amt = indent_amt
def emit(self):
return ' ' * self._indent_amt
@property
def size(self):
return self._indent_amt
class _Space(object):
"""Represent a space in the atom stream."""
def emit(self):
return ' '
@property
def size(self):
return 1
class _LineBreak(object):
"""Represent a line break in the atom stream."""
def emit(self):
return '\n'
@property
def size(self):
return 0
def __init__(self, max_line_length):
self._max_line_length = max_line_length
self._lines = []
self._bracket_depth = 0
self._prev_item = None
self._prev_prev_item = None
def __repr__(self):
return self.emit()
###########################################################################
# Public Methods
def add(self, obj, indent_amt, break_after_open_bracket):
if isinstance(obj, Atom):
self._add_item(obj, indent_amt)
return
self._add_container(obj, indent_amt, break_after_open_bracket)
def add_comment(self, item):
num_spaces = 2
if len(self._lines) > 1:
if isinstance(self._lines[-1], self._Space):
num_spaces -= 1
if len(self._lines) > 2:
if isinstance(self._lines[-2], self._Space):
num_spaces -= 1
while num_spaces > 0:
self._lines.append(self._Space())
num_spaces -= 1
self._lines.append(item)
def add_indent(self, indent_amt):
self._lines.append(self._Indent(indent_amt))
def add_line_break(self, indent):
self._lines.append(self._LineBreak())
self.add_indent(len(indent))
def add_line_break_at(self, index, indent_amt):
self._lines.insert(index, self._LineBreak())
self._lines.insert(index + 1, self._Indent(indent_amt))
def add_space_if_needed(self, curr_text, equal=False):
if (
not self._lines or isinstance(
self._lines[-1], (self._LineBreak, self._Indent, self._Space))
):
return
prev_text = str(self._prev_item)
prev_prev_text = (
str(self._prev_prev_item) if self._prev_prev_item else '')
if (
# The previous item was a keyword or identifier and the current
# item isn't an operator that doesn't require a space.
((self._prev_item.is_keyword or self._prev_item.is_string or
self._prev_item.is_name or self._prev_item.is_number) and
(curr_text[0] not in '([{.,:}])' or
(curr_text[0] == '=' and equal))) or
# Don't place spaces around a '.', unless it's in an 'import'
# statement.
((prev_prev_text != 'from' and prev_text[-1] != '.' and
curr_text != 'import') and
# Don't place a space before a colon.
curr_text[0] != ':' and
# Don't split up ending brackets by spaces.
((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
# Put a space after a colon or comma.
prev_text[-1] in ':,' or
# Put space around '=' if asked to.
(equal and prev_text == '=') or
# Put spaces around non-unary arithmetic operators.
((self._prev_prev_item and
(prev_text not in '+-' and
(self._prev_prev_item.is_name or
self._prev_prev_item.is_number or
self._prev_prev_item.is_string)) and
prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
):
self._lines.append(self._Space())
def previous_item(self):
"""Return the previous non-whitespace item."""
return self._prev_item
def fits_on_current_line(self, item_extent):
return self.current_size() + item_extent <= self._max_line_length
def current_size(self):
"""The size of the current line minus the indentation."""
size = 0
for item in reversed(self._lines):
size += item.size
if isinstance(item, self._LineBreak):
break
return size
def line_empty(self):
return (self._lines and
isinstance(self._lines[-1],
(self._LineBreak, self._Indent)))
def emit(self):
string = ''
for item in self._lines:
if isinstance(item, self._LineBreak):
string = string.rstrip()
string += item.emit()
return string.rstrip() + '\n'
###########################################################################
# Private Methods
def _add_item(self, item, indent_amt):
"""Add an item to the line.
Reflow the line to get the best formatting after the item is
inserted. The bracket depth indicates if the item is being
inserted inside of a container or not.
"""
if self._prev_item and self._prev_item.is_string and item.is_string:
# Place consecutive string literals on separate lines.
self._lines.append(self._LineBreak())
self._lines.append(self._Indent(indent_amt))
item_text = str(item)
if self._lines and self._bracket_depth:
# Adding the item into a container.
self._prevent_default_initializer_splitting(item, indent_amt)
if item_text in '.,)]}':
self._split_after_delimiter(item, indent_amt)
elif self._lines and not self.line_empty():
# Adding the item outside of a container.
if self.fits_on_current_line(len(item_text)):
self._enforce_space(item)
else:
# Line break for the new item.
self._lines.append(self._LineBreak())
self._lines.append(self._Indent(indent_amt))
self._lines.append(item)
self._prev_item, self._prev_prev_item = item, self._prev_item
if item_text in '([{':
self._bracket_depth += 1
elif item_text in '}])':
self._bracket_depth -= 1
assert self._bracket_depth >= 0
def _add_container(self, container, indent_amt, break_after_open_bracket):
actual_indent = indent_amt + 1
if (
str(self._prev_item) != '=' and
not self.line_empty() and
not self.fits_on_current_line(
container.size + self._bracket_depth + 2)
):
if str(container)[0] == '(' and self._prev_item.is_name:
# Don't split before the opening bracket of a call.
break_after_open_bracket = True
actual_indent = indent_amt + 4
elif (
break_after_open_bracket or
str(self._prev_item) not in '([{'
):
# If the container doesn't fit on the current line and the
# current line isn't empty, place the container on the next
# line.
self._lines.append(self._LineBreak())
self._lines.append(self._Indent(indent_amt))
break_after_open_bracket = False
else:
actual_indent = self.current_size() + 1
break_after_open_bracket = False
if isinstance(container, (ListComprehension, IfExpression)):
actual_indent = indent_amt
# Increase the continued indentation only if recursing on a
# container.
container.reflow(self, ' ' * actual_indent,
break_after_open_bracket=break_after_open_bracket)
def _prevent_default_initializer_splitting(self, item, indent_amt):
"""Prevent splitting between a default initializer.
When there is a default initializer, it's best to keep it all on
the same line. It's nicer and more readable, even if it goes
over the maximum allowable line length. This goes back along the
current line to determine if we have a default initializer, and,
if so, to remove extraneous whitespaces and add a line
break/indent before it if needed.
"""
if str(item) == '=':
# This is the assignment in the initializer. Just remove spaces for
# now.
self._delete_whitespace()
return
if (not self._prev_item or not self._prev_prev_item or
str(self._prev_item) != '='):
return
self._delete_whitespace()
prev_prev_index = self._lines.index(self._prev_prev_item)
if (
isinstance(self._lines[prev_prev_index - 1], self._Indent) or
self.fits_on_current_line(item.size + 1)
):
# The default initializer is already the only item on this line.
# Don't insert a newline here.
return
# Replace the space with a newline/indent combo.
if isinstance(self._lines[prev_prev_index - 1], self._Space):
del self._lines[prev_prev_index - 1]
self.add_line_break_at(self._lines.index(self._prev_prev_item),
indent_amt)
def _split_after_delimiter(self, item, indent_amt):
"""Split the line only after a delimiter."""
self._delete_whitespace()
if self.fits_on_current_line(item.size):
return
last_space = None
for current_item in reversed(self._lines):
if (
last_space and
(not isinstance(current_item, Atom) or
not current_item.is_colon)
):
break
else:
last_space = None
if isinstance(current_item, self._Space):
last_space = current_item
if isinstance(current_item, (self._LineBreak, self._Indent)):
return
if not last_space:
return
self.add_line_break_at(self._lines.index(last_space), indent_amt)
def _enforce_space(self, item):
"""Enforce a space in certain situations.
There are cases where we will want a space where normally we
wouldn't put one. This just enforces the addition of a space.
"""
if isinstance(self._lines[-1],
(self._Space, self._LineBreak, self._Indent)):
return
if not self._prev_item:
return
item_text = str(item)
prev_text = str(self._prev_item)
# Prefer a space around a '.' in an import statement, and between the
# 'import' and '('.
if (
(item_text == '.' and prev_text == 'from') or
(item_text == 'import' and prev_text == '.') or
(item_text == '(' and prev_text == 'import')
):
self._lines.append(self._Space())
def _delete_whitespace(self):
"""Delete all whitespace from the end of the line."""
while isinstance(self._lines[-1], (self._Space, self._LineBreak,
self._Indent)):
del self._lines[-1]
class Atom(object):
"""The smallest unbreakable unit that can be reflowed."""
def __init__(self, atom):
self._atom = atom
def __repr__(self):
return self._atom.token_string
def __len__(self):
return self.size
def reflow(
self, reflowed_lines, continued_indent, extent,
break_after_open_bracket=False,
is_list_comp_or_if_expr=False,
next_is_dot=False
):
if self._atom.token_type == tokenize.COMMENT:
reflowed_lines.add_comment(self)
return
total_size = extent if extent else self.size
if self._atom.token_string not in ',:([{}])':
# Some atoms will need an extra 1-sized space token after them.
total_size += 1
prev_item = reflowed_lines.previous_item()
if (
not is_list_comp_or_if_expr and
not reflowed_lines.fits_on_current_line(total_size) and
not (next_is_dot and
reflowed_lines.fits_on_current_line(self.size + 1)) and
not reflowed_lines.line_empty() and
not self.is_colon and
not (prev_item and prev_item.is_name and
str(self) == '(')
):
# Start a new line if there is already something on the line and
# adding this atom would make it go over the max line length.
reflowed_lines.add_line_break(continued_indent)
else:
reflowed_lines.add_space_if_needed(str(self))
reflowed_lines.add(self, len(continued_indent),
break_after_open_bracket)
def emit(self):
return self.__repr__()
@property
def is_keyword(self):
return keyword.iskeyword(self._atom.token_string)
@property
def is_string(self):
return self._atom.token_type == tokenize.STRING
@property
def is_name(self):
return self._atom.token_type == tokenize.NAME
@property
def is_number(self):
return self._atom.token_type == tokenize.NUMBER
@property
def is_comma(self):
return self._atom.token_string == ','
@property
def is_colon(self):
return self._atom.token_string == ':'
@property
def size(self):
return len(self._atom.token_string)
class Container(object):
"""Base class for all container types."""
def __init__(self, items):
self._items = items
def __repr__(self):
string = ''
last_was_keyword = False
for item in self._items:
if item.is_comma:
string += ', '
elif item.is_colon:
string += ': '
else:
item_string = str(item)
if (
string and
(last_was_keyword or
(not string.endswith(tuple('([{,.:}]) ')) and
not item_string.startswith(tuple('([{,.:}])'))))
):
string += ' '
string += item_string
last_was_keyword = item.is_keyword
return string
def __iter__(self):
for element in self._items:
yield element
def __getitem__(self, idx):
return self._items[idx]
def reflow(self, reflowed_lines, continued_indent,
break_after_open_bracket=False):
last_was_container = False
for (index, item) in enumerate(self._items):
next_item = get_item(self._items, index + 1)
if isinstance(item, Atom):
is_list_comp_or_if_expr = (
isinstance(self, (ListComprehension, IfExpression)))
item.reflow(reflowed_lines, continued_indent,
self._get_extent(index),
is_list_comp_or_if_expr=is_list_comp_or_if_expr,
next_is_dot=(next_item and
str(next_item) == '.'))
if last_was_container and item.is_comma:
reflowed_lines.add_line_break(continued_indent)
last_was_container = False
else: # isinstance(item, Container)
reflowed_lines.add(item, len(continued_indent),
break_after_open_bracket)
last_was_container = not isinstance(item, (ListComprehension,
IfExpression))
if (
break_after_open_bracket and index == 0 and
# Prefer to keep empty containers together instead of
# separating them.
str(item) == self.open_bracket and
(not next_item or str(next_item) != self.close_bracket) and
(len(self._items) != 3 or not isinstance(next_item, Atom))
):
reflowed_lines.add_line_break(continued_indent)
break_after_open_bracket = False
else:
next_next_item = get_item(self._items, index + 2)
if (
str(item) not in ['.', '%', 'in'] and
next_item and not isinstance(next_item, Container) and
str(next_item) != ':' and
next_next_item and (not isinstance(next_next_item, Atom) or
str(next_item) == 'not') and
not reflowed_lines.line_empty() and
not reflowed_lines.fits_on_current_line(
self._get_extent(index + 1) + 2)
):
reflowed_lines.add_line_break(continued_indent)
def _get_extent(self, index):
"""The extent of the full element.
E.g., the length of a function call or keyword.
"""
extent = 0
prev_item = get_item(self._items, index - 1)
seen_dot = prev_item and str(prev_item) == '.'
while index < len(self._items):
item = get_item(self._items, index)
index += 1
if isinstance(item, (ListComprehension, IfExpression)):
break
if isinstance(item, Container):
if prev_item and prev_item.is_name:
if seen_dot:
extent += 1
else:
extent += item.size
prev_item = item
continue
elif (str(item) not in ['.', '=', ':', 'not'] and
not item.is_name and not item.is_string):
break
if str(item) == '.':
seen_dot = True
extent += item.size
prev_item = item
return extent
@property
def is_string(self):
return False
@property
def size(self):
return len(self.__repr__())
@property
def is_keyword(self):
return False
@property
def is_name(self):
return False
@property
def is_comma(self):
return False
@property
def is_colon(self):
return False
@property
def open_bracket(self):
return None
@property
def close_bracket(self):
return None
class Tuple(Container):
"""A high-level representation of a tuple."""
@property
def open_bracket(self):
return '('
@property
def close_bracket(self):
return ')'
class List(Container):
"""A high-level representation of a list."""
@property
def open_bracket(self):
return '['
@property
def close_bracket(self):
return ']'
class DictOrSet(Container):
"""A high-level representation of a dictionary or set."""
@property
def open_bracket(self):
return '{'
@property
def close_bracket(self):
return '}'
class ListComprehension(Container):
"""A high-level representation of a list comprehension."""
@property
def size(self):
length = 0
for item in self._items:
if isinstance(item, IfExpression):
break
length += item.size
return length
class IfExpression(Container):
"""A high-level representation of an if-expression."""
def _parse_container(tokens, index, for_or_if=None):
"""Parse a high-level container, such as a list, tuple, etc."""
# Store the opening bracket.
items = [Atom(Token(*tokens[index]))]
index += 1
num_tokens = len(tokens)
while index < num_tokens:
tok = Token(*tokens[index])
if tok.token_string in ',)]}':
# First check if we're at the end of a list comprehension or
# if-expression. Don't add the ending token as part of the list
# comprehension or if-expression, because they aren't part of those
# constructs.
if for_or_if == 'for':
return (ListComprehension(items), index - 1)
elif for_or_if == 'if':
return (IfExpression(items), index - 1)
# We've reached the end of a container.
items.append(Atom(tok))
# If not, then we are at the end of a container.
if tok.token_string == ')':
# The end of a tuple.
return (Tuple(items), index)
elif tok.token_string == ']':
# The end of a list.
return (List(items), index)
elif tok.token_string == '}':
# The end of a dictionary or set.
return (DictOrSet(items), index)
elif tok.token_string in '([{':
# A sub-container is being defined.
(container, index) = _parse_container(tokens, index)
items.append(container)
elif tok.token_string == 'for':
(container, index) = _parse_container(tokens, index, 'for')
items.append(container)
elif tok.token_string == 'if':
(container, index) = _parse_container(tokens, index, 'if')
items.append(container)
else:
items.append(Atom(tok))
index += 1
return (None, None)
def _parse_tokens(tokens):
"""Parse the tokens.
This converts the tokens into a form where we can manipulate them
more easily.
"""
index = 0
parsed_tokens = []
num_tokens = len(tokens)
while index < num_tokens:
tok = Token(*tokens[index])
assert tok.token_type != token.INDENT
if tok.token_type == tokenize.NEWLINE:
# There's only one newline and it's at the end.
break
if tok.token_string in '([{':
(container, index) = _parse_container(tokens, index)
if not container:
return None
parsed_tokens.append(container)
else:
parsed_tokens.append(Atom(tok))
index += 1
return parsed_tokens
def _reflow_lines(parsed_tokens, indentation, max_line_length,
start_on_prefix_line):
"""Reflow the lines so that it looks nice."""
if str(parsed_tokens[0]) == 'def':
# A function definition gets indented a bit more.
continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
else:
continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
break_after_open_bracket = not start_on_prefix_line
lines = ReformattedLines(max_line_length)
lines.add_indent(len(indentation.lstrip('\r\n')))
if not start_on_prefix_line:
# If splitting after the opening bracket will cause the first element
# to be aligned weirdly, don't try it.
first_token = get_item(parsed_tokens, 0)
second_token = get_item(parsed_tokens, 1)
if (
first_token and second_token and
str(second_token)[0] == '(' and
len(indentation) + len(first_token) + 1 == len(continued_indent)
):
return None
for item in parsed_tokens:
lines.add_space_if_needed(str(item), equal=True)
save_continued_indent = continued_indent
if start_on_prefix_line and isinstance(item, Container):
start_on_prefix_line = False
continued_indent = ' ' * (lines.current_size() + 1)
item.reflow(lines, continued_indent, break_after_open_bracket)
continued_indent = save_continued_indent
return lines.emit()
def _shorten_line_at_tokens_new(tokens, source, indentation,
max_line_length):
"""Shorten the line taking its length into account.
The input is expected to be free of newlines except for inside
multiline strings and at the end.
"""
# Yield the original source so to see if it's a better choice than the
# shortened candidate lines we generate here.
yield indentation + source
parsed_tokens = _parse_tokens(tokens)
if parsed_tokens:
# Perform two reflows. The first one starts on the same line as the
# prefix. The second starts on the line after the prefix.
fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
start_on_prefix_line=True)
if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
yield fixed
fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
start_on_prefix_line=False)
if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
yield fixed
def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
key_token_strings, aggressive):
"""Separate line by breaking at tokens in key_token_strings.
The input is expected to be free of newlines except for inside
multiline strings and at the end.
"""
offsets = []
for (index, _t) in enumerate(token_offsets(tokens)):
(token_type,
token_string,
start_offset,
end_offset) = _t
assert token_type != token.INDENT
if token_string in key_token_strings:
# Do not break in containers with zero or one items.
unwanted_next_token = {
'(': ')',
'[': ']',
'{': '}'}.get(token_string)
if unwanted_next_token:
if (
get_item(tokens,
index + 1,
default=[None, None])[1] == unwanted_next_token or
get_item(tokens,
index + 2,
default=[None, None])[1] == unwanted_next_token
):
continue
if (
index > 2 and token_string == '(' and
tokens[index - 1][1] in ',(%['
):
# Don't split after a tuple start, or before a tuple start if
# the tuple is in a list.
continue
if end_offset < len(source) - 1:
# Don't split right before newline.
offsets.append(end_offset)
else:
# Break at adjacent strings. These were probably meant to be on
# separate lines in the first place.
previous_token = get_item(tokens, index - 1)
if (
token_type == tokenize.STRING and
previous_token and previous_token[0] == tokenize.STRING
):
offsets.append(start_offset)
current_indent = None
fixed = None
for line in split_at_offsets(source, offsets):
if fixed:
fixed += '\n' + current_indent + line
for symbol in '([{':
if line.endswith(symbol):
current_indent += indent_word
else:
# First line.
fixed = line
assert not current_indent
current_indent = indent_word
assert fixed is not None
if check_syntax(normalize_multiline(fixed)
if aggressive > 1 else fixed):
return indentation + fixed
return None
def token_offsets(tokens):
"""Yield tokens and offsets."""
end_offset = 0
previous_end_row = 0
previous_end_column = 0
for t in tokens:
token_type = t[0]
token_string = t[1]
(start_row, start_column) = t[2]
(end_row, end_column) = t[3]
# Account for the whitespace between tokens.
end_offset += start_column
if previous_end_row == start_row:
end_offset -= previous_end_column
# Record the start offset of the token.
start_offset = end_offset
# Account for the length of the token itself.
end_offset += len(token_string)
yield (token_type,
token_string,
start_offset,
end_offset)
previous_end_row = end_row
previous_end_column = end_column
def normalize_multiline(line):
"""Normalize multiline-related code that will cause syntax error.
This is for purposes of checking syntax.
"""
if line.startswith('def ') and line.rstrip().endswith(':'):
return line + ' pass'
elif line.startswith('return '):
return 'def _(): ' + line
elif line.startswith('@'):
return line + 'def _(): pass'
elif line.startswith('class '):
return line + ' pass'
elif line.startswith(('if ', 'elif ', 'for ', 'while ')):
return line + ' pass'
return line
def fix_whitespace(line, offset, replacement):
"""Replace whitespace at offset and return fixed line."""
# Replace escaped newlines too
left = line[:offset].rstrip('\n\r \t\\')
right = line[offset:].lstrip('\n\r \t\\')
if right.startswith('#'):
return line
return left + replacement + right
def _execute_pep8(pep8_options, source):
"""Execute pycodestyle via python method calls."""
class QuietReport(pycodestyle.BaseReport):
"""Version of checker that does not print."""
def __init__(self, options):
super(QuietReport, self).__init__(options)
self.__full_error_results = []
def error(self, line_number, offset, text, check):
"""Collect errors."""
code = super(QuietReport, self).error(line_number,
offset,
text,
check)
if code:
self.__full_error_results.append(
{'id': code,
'line': line_number,
'column': offset + 1,
'info': text})
def full_error_results(self):
"""Return error results in detail.
Results are in the form of a list of dictionaries. Each
dictionary contains 'id', 'line', 'column', and 'info'.
"""
return self.__full_error_results
checker = pycodestyle.Checker('', lines=source, reporter=QuietReport,
**pep8_options)
checker.check_all()
return checker.report.full_error_results()
def _remove_leading_and_normalize(line, with_rstrip=True):
# ignore FF in first lstrip()
if with_rstrip:
return line.lstrip(' \t\v').rstrip(CR + LF) + '\n'
return line.lstrip(' \t\v')
class Reindenter(object):
"""Reindents badly-indented code to uniformly use four-space indentation.
Released to the public domain, by Tim Peters, 03 October 2000.
"""
def __init__(self, input_text, leave_tabs=False):
sio = io.StringIO(input_text)
source_lines = sio.readlines()
self.string_content_line_numbers = multiline_string_lines(input_text)
# File lines, rstripped & tab-expanded. Dummy at start is so
# that we can use tokenize's 1-based line numbering easily.
# Note that a line is all-blank iff it is a newline.
self.lines = []
for line_number, line in enumerate(source_lines, start=1):
# Do not modify if inside a multiline string.
if line_number in self.string_content_line_numbers:
self.lines.append(line)
else:
# Only expand leading tabs.
with_rstrip = line_number != len(source_lines)
if leave_tabs:
self.lines.append(
_get_indentation(line) +
_remove_leading_and_normalize(line, with_rstrip)
)
else:
self.lines.append(
_get_indentation(line).expandtabs() +
_remove_leading_and_normalize(line, with_rstrip)
)
self.lines.insert(0, None)
self.index = 1 # index into self.lines of next line
self.input_text = input_text
def run(self, indent_size=DEFAULT_INDENT_SIZE):
"""Fix indentation and return modified line numbers.
Line numbers are indexed at 1.
"""
if indent_size < 1:
return self.input_text
try:
stats = _reindent_stats(tokenize.generate_tokens(self.getline))
except (SyntaxError, tokenize.TokenError):
return self.input_text
# Remove trailing empty lines.
lines = self.lines
# Sentinel.
stats.append((len(lines), 0))
# Map count of leading spaces to # we want.
have2want = {}
# Program after transformation.
after = []
# Copy over initial empty lines -- there's nothing to do until
# we see a line with *something* on it.
i = stats[0][0]
after.extend(lines[1:i])
for i in range(len(stats) - 1):
thisstmt, thislevel = stats[i]
nextstmt = stats[i + 1][0]
have = _leading_space_count(lines[thisstmt])
want = thislevel * indent_size
if want < 0:
# A comment line.
if have:
# An indented comment line. If we saw the same
# indentation before, reuse what it most recently
# mapped to.
want = have2want.get(have, -1)
if want < 0:
# Then it probably belongs to the next real stmt.
for j in range(i + 1, len(stats) - 1):
jline, jlevel = stats[j]
if jlevel >= 0:
if have == _leading_space_count(lines[jline]):
want = jlevel * indent_size
break
# Maybe it's a hanging comment like this one,
if want < 0:
# in which case we should shift it like its base
# line got shifted.
for j in range(i - 1, -1, -1):
jline, jlevel = stats[j]
if jlevel >= 0:
want = (have + _leading_space_count(
after[jline - 1]) -
_leading_space_count(lines[jline]))
break
if want < 0:
# Still no luck -- leave it alone.
want = have
else:
want = 0
assert want >= 0
have2want[have] = want
diff = want - have
if diff == 0 or have == 0:
after.extend(lines[thisstmt:nextstmt])
else:
for line_number, line in enumerate(lines[thisstmt:nextstmt],
start=thisstmt):
if line_number in self.string_content_line_numbers:
after.append(line)
elif diff > 0:
if line == '\n':
after.append(line)
else:
after.append(' ' * diff + line)
else:
remove = min(_leading_space_count(line), -diff)
after.append(line[remove:])
return ''.join(after)
def getline(self):
"""Line-getter for tokenize."""
if self.index >= len(self.lines):
line = ''
else:
line = self.lines[self.index]
self.index += 1
return line
def _reindent_stats(tokens):
"""Return list of (lineno, indentlevel) pairs.
One for each stmt and comment line. indentlevel is -1 for comment
lines, as a signal that tokenize doesn't know what to do about them;
indeed, they're our headache!
"""
find_stmt = 1 # Next token begins a fresh stmt?
level = 0 # Current indent level.
stats = []
for t in tokens:
token_type = t[0]
sline = t[2][0]
line = t[4]
if token_type == tokenize.NEWLINE:
# A program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
find_stmt = 1
elif token_type == tokenize.INDENT:
find_stmt = 1
level += 1
elif token_type == tokenize.DEDENT:
find_stmt = 1
level -= 1
elif token_type == tokenize.COMMENT:
if find_stmt:
stats.append((sline, -1))
# But we're still looking for a new stmt, so leave
# find_stmt alone.
elif token_type == tokenize.NL:
pass
elif find_stmt:
# This is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER.
find_stmt = 0
if line: # Not endmarker.
stats.append((sline, level))
return stats
def _leading_space_count(line):
"""Return number of leading spaces in line."""
i = 0
while i < len(line) and line[i] == ' ':
i += 1
return i
def refactor_with_2to3(source_text, fixer_names, filename=''):
"""Use lib2to3 to refactor the source.
Return the refactored source code.
"""
from lib2to3.refactor import RefactoringTool
fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
from lib2to3.pgen2 import tokenize as lib2to3_tokenize
try:
# The name parameter is necessary particularly for the "import" fixer.
return str(tool.refactor_string(source_text, name=filename))
except lib2to3_tokenize.TokenError:
return source_text
def check_syntax(code):
"""Return True if syntax is okay."""
try:
return compile(code, '<string>', 'exec', dont_inherit=True)
except (SyntaxError, TypeError, ValueError):
return False
def find_with_line_numbers(pattern, contents):
"""A wrapper around 're.finditer' to find line numbers.
Returns a list of line numbers where pattern was found in contents.
"""
matches = list(re.finditer(pattern, contents))
if not matches:
return []
end = matches[-1].start()
# -1 so a failed `rfind` maps to the first line.
newline_offsets = {
-1: 0
}
for line_num, m in enumerate(re.finditer(r'\n', contents), 1):
offset = m.start()
if offset > end:
break
newline_offsets[offset] = line_num
def get_line_num(match, contents):
"""Get the line number of string in a files contents.
Failing to find the newline is OK, -1 maps to 0
"""
newline_offset = contents.rfind('\n', 0, match.start())
return newline_offsets[newline_offset]
return [get_line_num(match, contents) + 1 for match in matches]
def get_disabled_ranges(source):
"""Returns a list of tuples representing the disabled ranges.
If disabled and no re-enable will disable for rest of file.
"""
enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source)
disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source)
total_lines = len(re.findall("\n", source)) + 1
enable_commands = {}
for num in enable_line_nums:
enable_commands[num] = True
for num in disable_line_nums:
enable_commands[num] = False
disabled_ranges = []
currently_enabled = True
disabled_start = None
for line, commanded_enabled in sorted(enable_commands.items()):
if commanded_enabled is False and currently_enabled is True:
disabled_start = line
currently_enabled = False
elif commanded_enabled is True and currently_enabled is False:
disabled_ranges.append((disabled_start, line))
currently_enabled = True
if currently_enabled is False:
disabled_ranges.append((disabled_start, total_lines))
return disabled_ranges
def filter_disabled_results(result, disabled_ranges):
"""Filter out reports based on tuple of disabled ranges.
"""
line = result['line']
for disabled_range in disabled_ranges:
if disabled_range[0] <= line <= disabled_range[1]:
return False
return True
def filter_results(source, results, aggressive):
"""Filter out spurious reports from pycodestyle.
If aggressive is True, we allow possibly unsafe fixes (E711, E712).
"""
non_docstring_string_line_numbers = multiline_string_lines(
source, include_docstrings=False)
all_string_line_numbers = multiline_string_lines(
source, include_docstrings=True)
commented_out_code_line_numbers = commented_out_code_lines(source)
# Filter out the disabled ranges
disabled_ranges = get_disabled_ranges(source)
if disabled_ranges:
results = [
result for result in results if filter_disabled_results(
result,
disabled_ranges,
)
]
has_e901 = any(result['id'].lower() == 'e901' for result in results)
for r in results:
issue_id = r['id'].lower()
if r['line'] in non_docstring_string_line_numbers:
if issue_id.startswith(('e1', 'e501', 'w191')):
continue
if r['line'] in all_string_line_numbers:
if issue_id in ['e501']:
continue
# We must offset by 1 for lines that contain the trailing contents of
# multiline strings.
if not aggressive and (r['line'] + 1) in all_string_line_numbers:
# Do not modify multiline strings in non-aggressive mode. Remove
# trailing whitespace could break doctests.
if issue_id.startswith(('w29', 'w39')):
continue
if aggressive <= 0:
if issue_id.startswith(('e711', 'e72', 'w6')):
continue
if aggressive <= 1:
if issue_id.startswith(('e712', 'e713', 'e714')):
continue
if aggressive <= 2:
if issue_id.startswith(('e704')):
continue
if r['line'] in commented_out_code_line_numbers:
if issue_id.startswith(('e26', 'e501')):
continue
# Do not touch indentation if there is a token error caused by
# incomplete multi-line statement. Otherwise, we risk screwing up the
# indentation.
if has_e901:
if issue_id.startswith(('e1', 'e7')):
continue
yield r
def multiline_string_lines(source, include_docstrings=False):
"""Return line numbers that are within multiline strings.
The line numbers are indexed at 1.
Docstrings are ignored.
"""
line_numbers = set()
previous_token_type = ''
try:
for t in generate_tokens(source):
token_type = t[0]
start_row = t[2][0]
end_row = t[3][0]
if token_type == tokenize.STRING and start_row != end_row:
if (
include_docstrings or
previous_token_type != tokenize.INDENT
):
# We increment by one since we want the contents of the
# string.
line_numbers |= set(range(1 + start_row, 1 + end_row))
previous_token_type = token_type
except (SyntaxError, tokenize.TokenError):
pass
return line_numbers
def commented_out_code_lines(source):
"""Return line numbers of comments that are likely code.
Commented-out code is bad practice, but modifying it just adds even
more clutter.
"""
line_numbers = []
try:
for t in generate_tokens(source):
token_type = t[0]
token_string = t[1]
start_row = t[2][0]
line = t[4]
# Ignore inline comments.
if not line.lstrip().startswith('#'):
continue
if token_type == tokenize.COMMENT:
stripped_line = token_string.lstrip('#').strip()
with warnings.catch_warnings():
# ignore SyntaxWarning in Python3.8+
# refs:
# https://bugs.python.org/issue15248
# https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
warnings.filterwarnings("ignore", category=SyntaxWarning)
if (
' ' in stripped_line and
'#' not in stripped_line and
check_syntax(stripped_line)
):
line_numbers.append(start_row)
except (SyntaxError, tokenize.TokenError):
pass
return line_numbers
def shorten_comment(line, max_line_length, last_comment=False):
"""Return trimmed or split long comment line.
If there are no comments immediately following it, do a text wrap.
Doing this wrapping on all comments in general would lead to jagged
comment text.
"""
assert len(line) > max_line_length
line = line.rstrip()
# PEP 8 recommends 72 characters for comment text.
indentation = _get_indentation(line) + '# '
max_line_length = min(max_line_length,
len(indentation) + 72)
MIN_CHARACTER_REPEAT = 5
if (
len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
not line[-1].isalnum()
):
# Trim comments that end with things like ---------
return line[:max_line_length] + '\n'
elif last_comment and re.match(r'\s*#+\s*\w+', line):
split_lines = textwrap.wrap(line.lstrip(' \t#'),
initial_indent=indentation,
subsequent_indent=indentation,
width=max_line_length,
break_long_words=False,
break_on_hyphens=False)
return '\n'.join(split_lines) + '\n'
return line + '\n'
def normalize_line_endings(lines, newline):
"""Return fixed line endings.
All lines will be modified to use the most common line ending.
"""
line = [line.rstrip('\n\r') + newline for line in lines]
if line and lines[-1] == lines[-1].rstrip('\n\r'):
line[-1] = line[-1].rstrip('\n\r')
return line
def mutual_startswith(a, b):
return b.startswith(a) or a.startswith(b)
def code_match(code, select, ignore):
if ignore:
assert not isinstance(ignore, str)
for ignored_code in [c.strip() for c in ignore]:
if mutual_startswith(code.lower(), ignored_code.lower()):
return False
if select:
assert not isinstance(select, str)
for selected_code in [c.strip() for c in select]:
if mutual_startswith(code.lower(), selected_code.lower()):
return True
return False
return True
def fix_code(source, options=None, encoding=None, apply_config=False):
"""Return fixed source code.
"encoding" will be used to decode "source" if it is a byte string.
"""
options = _get_options(options, apply_config)
if not isinstance(source, str):
source = source.decode(encoding or get_encoding())
sio = io.StringIO(source)
return fix_lines(sio.readlines(), options=options)
def _get_options(raw_options, apply_config):
"""Return parsed options."""
if not raw_options:
return parse_args([''], apply_config=apply_config)
if isinstance(raw_options, dict):
options = parse_args([''], apply_config=apply_config)
for name, value in raw_options.items():
if not hasattr(options, name):
raise ValueError("No such option '{}'".format(name))
# Check for very basic type errors.
expected_type = type(getattr(options, name))
if not isinstance(expected_type, (str, )):
if isinstance(value, (str, )):
raise ValueError(
"Option '{}' should not be a string".format(name))
setattr(options, name, value)
else:
options = raw_options
return options
def fix_lines(source_lines, options, filename=''):
"""Return fixed source code."""
# Transform everything to line feed. Then change them back to original
# before returning fixed source code.
original_newline = find_newline(source_lines)
tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
# Keep a history to break out of cycles.
previous_hashes = set()
if options.line_range:
# Disable "apply_local_fixes()" for now due to issue #175.
fixed_source = tmp_source
else:
pep8_options = {
'ignore': options.ignore,
'select': options.select,
'max_line_length': options.max_line_length,
'hang_closing': options.hang_closing,
}
sio = io.StringIO(tmp_source)
contents = sio.readlines()
results = _execute_pep8(pep8_options, contents)
codes = {result['id'] for result in results
if result['id'] in SELECTED_GLOBAL_FIXED_METHOD_CODES}
# Apply global fixes only once (for efficiency).
fixed_source = apply_global_fixes(tmp_source,
options,
filename=filename,
codes=codes)
passes = 0
long_line_ignore_cache = set()
while hash(fixed_source) not in previous_hashes:
if options.pep8_passes >= 0 and passes > options.pep8_passes:
break
passes += 1
previous_hashes.add(hash(fixed_source))
tmp_source = copy.copy(fixed_source)
fix = FixPEP8(
filename,
options,
contents=tmp_source,
long_line_ignore_cache=long_line_ignore_cache)
fixed_source = fix.fix()
sio = io.StringIO(fixed_source)
return ''.join(normalize_line_endings(sio.readlines(), original_newline))
def fix_file(filename, options=None, output=None, apply_config=False):
if not options:
options = parse_args([filename], apply_config=apply_config)
original_source = readlines_from_file(filename)
fixed_source = original_source
if options.in_place or options.diff or output:
encoding = detect_encoding(filename)
if output:
output = LineEndingWrapper(wrap_output(output, encoding=encoding))
fixed_source = fix_lines(fixed_source, options, filename=filename)
if options.diff:
new = io.StringIO(fixed_source)
new = new.readlines()
diff = get_diff_text(original_source, new, filename)
if output:
output.write(diff)
output.flush()
elif options.jobs > 1:
diff = diff.encode(encoding)
return diff
elif options.in_place:
original = "".join(original_source).splitlines()
fixed = fixed_source.splitlines()
original_source_last_line = (
original_source[-1].split("\n")[-1] if original_source else ""
)
fixed_source_last_line = fixed_source.split("\n")[-1]
if original != fixed or (
original_source_last_line != fixed_source_last_line
):
with open_with_encoding(filename, 'w', encoding=encoding) as fp:
fp.write(fixed_source)
return fixed_source
return None
else:
if output:
output.write(fixed_source)
output.flush()
return fixed_source
def global_fixes():
"""Yield multiple (code, function) tuples."""
for function in list(globals().values()):
if inspect.isfunction(function):
arguments = _get_parameters(function)
if arguments[:1] != ['source']:
continue
code = extract_code_from_function(function)
if code:
yield (code, function)
def _get_parameters(function):
# pylint: disable=deprecated-method
if sys.version_info.major >= 3:
# We need to match "getargspec()", which includes "self" as the first
# value for methods.
# https://bugs.python.org/issue17481#msg209469
if inspect.ismethod(function):
function = function.__func__
return list(inspect.signature(function).parameters)
else:
return inspect.getargspec(function)[0]
def apply_global_fixes(source, options, where='global', filename='',
codes=None):
"""Run global fixes on source code.
These are fixes that only need be done once (unlike those in
FixPEP8, which are dependent on pycodestyle).
"""
if codes is None:
codes = []
if any(code_match(code, select=options.select, ignore=options.ignore)
for code in ['E101', 'E111']):
source = reindent(source,
indent_size=options.indent_size,
leave_tabs=not (
code_match(
'W191',
select=options.select,
ignore=options.ignore)
)
)
for (code, function) in global_fixes():
if code.upper() in SELECTED_GLOBAL_FIXED_METHOD_CODES \
and code.upper() not in codes:
continue
if code_match(code, select=options.select, ignore=options.ignore):
if options.verbose:
print('---> Applying {} fix for {}'.format(where,
code.upper()),
file=sys.stderr)
source = function(source,
aggressive=options.aggressive)
source = fix_2to3(source,
aggressive=options.aggressive,
select=options.select,
ignore=options.ignore,
filename=filename,
where=where,
verbose=options.verbose)
return source
def extract_code_from_function(function):
"""Return code handled by function."""
if not function.__name__.startswith('fix_'):
return None
code = re.sub('^fix_', '', function.__name__)
if not code:
return None
try:
int(code[1:])
except ValueError:
return None
return code
def _get_package_version():
packages = ["pycodestyle: {}".format(pycodestyle.__version__)]
return ", ".join(packages)
def create_parser():
"""Return command-line parser."""
parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
prog='autopep8')
parser.add_argument('--version', action='version',
version='%(prog)s {} ({})'.format(
__version__, _get_package_version()))
parser.add_argument('-v', '--verbose', action='count',
default=0,
help='print verbose messages; '
'multiple -v result in more verbose messages')
parser.add_argument('-d', '--diff', action='store_true',
help='print the diff for the fixed source')
parser.add_argument('-i', '--in-place', action='store_true',
help='make changes to files in place')
parser.add_argument('--global-config', metavar='filename',
default=DEFAULT_CONFIG,
help='path to a global pep8 config file; if this file '
'does not exist then this is ignored '
'(default: {})'.format(DEFAULT_CONFIG))
parser.add_argument('--ignore-local-config', action='store_true',
help="don't look for and apply local config files; "
'if not passed, defaults are updated with any '
"config files in the project's root directory")
parser.add_argument('-r', '--recursive', action='store_true',
help='run recursively over directories; '
'must be used with --in-place or --diff')
parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
help='number of parallel jobs; '
'match CPU count if value is less than 1')
parser.add_argument('-p', '--pep8-passes', metavar='n',
default=-1, type=int,
help='maximum number of additional pep8 passes '
'(default: infinite)')
parser.add_argument('-a', '--aggressive', action='count', default=0,
help='enable non-whitespace changes; '
'multiple -a result in more aggressive changes')
parser.add_argument('--experimental', action='store_true',
help='enable experimental fixes')
parser.add_argument('--exclude', metavar='globs',
help='exclude file/directory names that match these '
'comma-separated globs')
parser.add_argument('--list-fixes', action='store_true',
help='list codes for fixes; '
'used by --ignore and --select')
parser.add_argument('--ignore', metavar='errors', default='',
help='do not fix these errors/warnings '
'(default: {})'.format(DEFAULT_IGNORE))
parser.add_argument('--select', metavar='errors', default='',
help='fix only these errors/warnings (e.g. E4,W)')
parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
help='set maximum allowed line length '
'(default: %(default)s)')
parser.add_argument('--line-range', '--range', metavar='line',
default=None, type=int, nargs=2,
help='only fix errors found within this inclusive '
'range of line numbers (e.g. 1 99); '
'line numbers are indexed at 1')
parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
type=int, help=argparse.SUPPRESS)
parser.add_argument('--hang-closing', action='store_true',
help='hang-closing option passed to pycodestyle')
parser.add_argument('--exit-code', action='store_true',
help='change to behavior of exit code.'
' default behavior of return value, 0 is no '
'differences, 1 is error exit. return 2 when'
' add this option. 2 is exists differences.')
parser.add_argument('files', nargs='*',
help="files to format or '-' for standard in")
return parser
def _expand_codes(codes, ignore_codes):
"""expand to individual E/W codes"""
ret = set()
is_conflict = False
if all(
any(
conflicting_code.startswith(code)
for code in codes
)
for conflicting_code in CONFLICTING_CODES
):
is_conflict = True
is_ignore_w503 = "W503" in ignore_codes
is_ignore_w504 = "W504" in ignore_codes
for code in codes:
if code == "W":
if is_ignore_w503 and is_ignore_w504:
ret.update({"W1", "W2", "W3", "W505", "W6"})
elif is_ignore_w503:
ret.update({"W1", "W2", "W3", "W504", "W505", "W6"})
else:
ret.update({"W1", "W2", "W3", "W503", "W505", "W6"})
elif code in ("W5", "W50"):
if is_ignore_w503 and is_ignore_w504:
ret.update({"W505"})
elif is_ignore_w503:
ret.update({"W504", "W505"})
else:
ret.update({"W503", "W505"})
elif not (code in ("W503", "W504") and is_conflict):
ret.add(code)
return ret
def parse_args(arguments, apply_config=False):
"""Parse command-line options."""
parser = create_parser()
args = parser.parse_args(arguments)
if not args.files and not args.list_fixes:
parser.exit(EXIT_CODE_ARGPARSE_ERROR, 'incorrect number of arguments')
args.files = [decode_filename(name) for name in args.files]
if apply_config:
parser = read_config(args, parser)
# prioritize settings when exist pyproject.toml's tool.autopep8 section
try:
parser_with_pyproject_toml = read_pyproject_toml(args, parser)
except Exception:
parser_with_pyproject_toml = None
if parser_with_pyproject_toml:
parser = parser_with_pyproject_toml
args = parser.parse_args(arguments)
args.files = [decode_filename(name) for name in args.files]
if '-' in args.files:
if len(args.files) > 1:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'cannot mix stdin and regular files',
)
if args.diff:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--diff cannot be used with standard input',
)
if args.in_place:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--in-place cannot be used with standard input',
)
if args.recursive:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--recursive cannot be used with standard input',
)
if len(args.files) > 1 and not (args.in_place or args.diff):
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'autopep8 only takes one filename as argument '
'unless the "--in-place" or "--diff" args are used',
)
if args.recursive and not (args.in_place or args.diff):
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--recursive must be used with --in-place or --diff',
)
if args.in_place and args.diff:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--in-place and --diff are mutually exclusive',
)
if args.max_line_length <= 0:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--max-line-length must be greater than 0',
)
if args.indent_size <= 0:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--indent-size must be greater than 0',
)
if args.select:
args.select = _expand_codes(
_split_comma_separated(args.select),
(_split_comma_separated(args.ignore) if args.ignore else [])
)
if args.ignore:
args.ignore = _split_comma_separated(args.ignore)
if all(
not any(
conflicting_code.startswith(ignore_code)
for ignore_code in args.ignore
)
for conflicting_code in CONFLICTING_CODES
):
args.ignore.update(CONFLICTING_CODES)
elif not args.select:
if args.aggressive:
# Enable everything by default if aggressive.
args.select = {'E', 'W1', 'W2', 'W3', 'W6'}
else:
args.ignore = _split_comma_separated(DEFAULT_IGNORE)
if args.exclude:
args.exclude = _split_comma_separated(args.exclude)
else:
args.exclude = {}
if args.jobs < 1:
# Do not import multiprocessing globally in case it is not supported
# on the platform.
import multiprocessing
args.jobs = multiprocessing.cpu_count()
if args.jobs > 1 and not (args.in_place or args.diff):
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'parallel jobs requires --in-place',
)
if args.line_range:
if args.line_range[0] <= 0:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'--range must be positive numbers',
)
if args.line_range[0] > args.line_range[1]:
parser.exit(
EXIT_CODE_ARGPARSE_ERROR,
'First value of --range should be less than or equal '
'to the second',
)
return args
def _get_normalize_options(config, section, option_list):
for (k, _) in config.items(section):
norm_opt = k.lstrip('-').replace('-', '_')
if not option_list.get(norm_opt):
continue
opt_type = option_list[norm_opt]
if opt_type is int:
value = config.getint(section, k)
elif opt_type is bool:
value = config.getboolean(section, k)
else:
value = config.get(section, k)
yield norm_opt, k, value
def read_config(args, parser):
"""Read both user configuration and local configuration."""
config = SafeConfigParser()
try:
if args.verbose and os.path.exists(args.global_config):
print("read config path: {}".format(args.global_config))
config.read(args.global_config)
if not args.ignore_local_config:
parent = tail = args.files and os.path.abspath(
os.path.commonprefix(args.files))
while tail:
if config.read([os.path.join(parent, fn)
for fn in PROJECT_CONFIG]):
if args.verbose:
for fn in PROJECT_CONFIG:
config_file = os.path.join(parent, fn)
if not os.path.exists(config_file):
continue
print(
"read config path: {}".format(
os.path.join(parent, fn)
)
)
break
(parent, tail) = os.path.split(parent)
defaults = {}
option_list = {o.dest: o.type or type(o.default)
for o in parser._actions}
for section in ['pep8', 'pycodestyle', 'flake8']:
if not config.has_section(section):
continue
for norm_opt, k, value in _get_normalize_options(config, section,
option_list):
if args.verbose:
print("enable config: section={}, key={}, value={}".format(
section, k, value))
defaults[norm_opt] = value
parser.set_defaults(**defaults)
except Error:
# Ignore for now.
pass
return parser
def read_pyproject_toml(args, parser):
"""Read pyproject.toml and load configuration."""
import toml
config = None
if os.path.exists(args.global_config):
with open(args.global_config) as fp:
config = toml.load(fp)
if not args.ignore_local_config:
parent = tail = args.files and os.path.abspath(
os.path.commonprefix(args.files))
while tail:
pyproject_toml = os.path.join(parent, "pyproject.toml")
if os.path.exists(pyproject_toml):
with open(pyproject_toml) as fp:
config = toml.load(fp)
break
(parent, tail) = os.path.split(parent)
if not config:
return None
if config.get("tool", {}).get("autopep8") is None:
return None
config = config.get("tool").get("autopep8")
defaults = {}
option_list = {o.dest: o.type or type(o.default)
for o in parser._actions}
TUPLED_OPTIONS = ("ignore", "select")
for (k, v) in config.items():
norm_opt = k.lstrip('-').replace('-', '_')
if not option_list.get(norm_opt):
continue
if type(v) in (list, tuple) and norm_opt in TUPLED_OPTIONS:
value = ",".join(v)
else:
value = v
if args.verbose:
print("enable pyproject.toml config: "
"key={}, value={}".format(k, value))
defaults[norm_opt] = value
if defaults:
# set value when exists key-value in defaults dict
parser.set_defaults(**defaults)
return parser
def _split_comma_separated(string):
"""Return a set of strings."""
return {text.strip() for text in string.split(',') if text.strip()}
def decode_filename(filename):
"""Return Unicode filename."""
if isinstance(filename, str):
return filename
return filename.decode(sys.getfilesystemencoding())
def supported_fixes():
"""Yield pep8 error codes that autopep8 fixes.
Each item we yield is a tuple of the code followed by its
description.
"""
yield ('E101', docstring_summary(reindent.__doc__))
instance = FixPEP8(filename=None, options=None, contents='')
for attribute in dir(instance):
code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
if code:
yield (
code.group(1).upper(),
re.sub(r'\s+', ' ',
docstring_summary(getattr(instance, attribute).__doc__))
)
for (code, function) in sorted(global_fixes()):
yield (code.upper() + (4 - len(code)) * ' ',
re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
for code in sorted(CODE_TO_2TO3):
yield (code.upper() + (4 - len(code)) * ' ',
re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
def docstring_summary(docstring):
"""Return summary of docstring."""
return docstring.split('\n')[0] if docstring else ''
def line_shortening_rank(candidate, indent_word, max_line_length,
experimental=False):
"""Return rank of candidate.
This is for sorting candidates.
"""
if not candidate.strip():
return 0
rank = 0
lines = candidate.rstrip().split('\n')
offset = 0
if (
not lines[0].lstrip().startswith('#') and
lines[0].rstrip()[-1] not in '([{'
):
for (opening, closing) in ('()', '[]', '{}'):
# Don't penalize empty containers that aren't split up. Things like
# this "foo(\n )" aren't particularly good.
opening_loc = lines[0].find(opening)
closing_loc = lines[0].find(closing)
if opening_loc >= 0:
if closing_loc < 0 or closing_loc != opening_loc + 1:
offset = max(offset, 1 + opening_loc)
current_longest = max(offset + len(x.strip()) for x in lines)
rank += 4 * max(0, current_longest - max_line_length)
rank += len(lines)
# Too much variation in line length is ugly.
rank += 2 * standard_deviation(len(line) for line in lines)
bad_staring_symbol = {
'(': ')',
'[': ']',
'{': '}'}.get(lines[0][-1])
if len(lines) > 1:
if (
bad_staring_symbol and
lines[1].lstrip().startswith(bad_staring_symbol)
):
rank += 20
for lineno, current_line in enumerate(lines):
current_line = current_line.strip()
if current_line.startswith('#'):
continue
for bad_start in ['.', '%', '+', '-', '/']:
if current_line.startswith(bad_start):
rank += 100
# Do not tolerate operators on their own line.
if current_line == bad_start:
rank += 1000
if (
current_line.endswith(('.', '%', '+', '-', '/')) and
"': " in current_line
):
rank += 1000
if current_line.endswith(('(', '[', '{', '.')):
# Avoid lonely opening. They result in longer lines.
if len(current_line) <= len(indent_word):
rank += 100
# Avoid the ugliness of ", (\n".
if (
current_line.endswith('(') and
current_line[:-1].rstrip().endswith(',')
):
rank += 100
# Avoid the ugliness of "something[\n" and something[index][\n.
if (
current_line.endswith('[') and
len(current_line) > 1 and
(current_line[-2].isalnum() or current_line[-2] in ']')
):
rank += 300
# Also avoid the ugliness of "foo.\nbar"
if current_line.endswith('.'):
rank += 100
if has_arithmetic_operator(current_line):
rank += 100
# Avoid breaking at unary operators.
if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')):
rank += 1000
if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')):
rank += 1000
if current_line.endswith(('%', '(', '[', '{')):
rank -= 20
# Try to break list comprehensions at the "for".
if current_line.startswith('for '):
rank -= 50
if current_line.endswith('\\'):
# If a line ends in \-newline, it may be part of a
# multiline string. In that case, we would like to know
# how long that line is without the \-newline. If it's
# longer than the maximum, or has comments, then we assume
# that the \-newline is an okay candidate and only
# penalize it a bit.
total_len = len(current_line)
lineno += 1
while lineno < len(lines):
total_len += len(lines[lineno])
if lines[lineno].lstrip().startswith('#'):
total_len = max_line_length
break
if not lines[lineno].endswith('\\'):
break
lineno += 1
if total_len < max_line_length:
rank += 10
else:
rank += 100 if experimental else 1
# Prefer breaking at commas rather than colon.
if ',' in current_line and current_line.endswith(':'):
rank += 10
# Avoid splitting dictionaries between key and value.
if current_line.endswith(':'):
rank += 100
rank += 10 * count_unbalanced_brackets(current_line)
return max(0, rank)
def standard_deviation(numbers):
"""Return standard deviation."""
numbers = list(numbers)
if not numbers:
return 0
mean = sum(numbers) / len(numbers)
return (sum((n - mean) ** 2 for n in numbers) /
len(numbers)) ** .5
def has_arithmetic_operator(line):
"""Return True if line contains any arithmetic operators."""
for operator in pycodestyle.ARITHMETIC_OP:
if operator in line:
return True
return False
def count_unbalanced_brackets(line):
"""Return number of unmatched open/close brackets."""
count = 0
for opening, closing in ['()', '[]', '{}']:
count += abs(line.count(opening) - line.count(closing))
return count
def split_at_offsets(line, offsets):
"""Split line at offsets.
Return list of strings.
"""
result = []
previous_offset = 0
current_offset = 0
for current_offset in sorted(offsets):
if current_offset < len(line) and previous_offset != current_offset:
result.append(line[previous_offset:current_offset].strip())
previous_offset = current_offset
result.append(line[current_offset:])
return result
class LineEndingWrapper(object):
r"""Replace line endings to work with sys.stdout.
It seems that sys.stdout expects only '\n' as the line ending, no matter
the platform. Otherwise, we get repeated line endings.
"""
def __init__(self, output):
self.__output = output
def write(self, s):
self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
def flush(self):
self.__output.flush()
def match_file(filename, exclude):
"""Return True if file is okay for modifying/recursing."""
base_name = os.path.basename(filename)
if base_name.startswith('.'):
return False
for pattern in exclude:
if fnmatch.fnmatch(base_name, pattern):
return False
if fnmatch.fnmatch(filename, pattern):
return False
if not os.path.isdir(filename) and not is_python_file(filename):
return False
return True
def find_files(filenames, recursive, exclude):
"""Yield filenames."""
while filenames:
name = filenames.pop(0)
if recursive and os.path.isdir(name):
for root, directories, children in os.walk(name):
filenames += [os.path.join(root, f) for f in children
if match_file(os.path.join(root, f),
exclude)]
directories[:] = [d for d in directories
if match_file(os.path.join(root, d),
exclude)]
else:
is_exclude_match = False
for pattern in exclude:
if fnmatch.fnmatch(name, pattern):
is_exclude_match = True
break
if not is_exclude_match:
yield name
def _fix_file(parameters):
"""Helper function for optionally running fix_file() in parallel."""
if parameters[1].verbose:
print('[file:{}]'.format(parameters[0]), file=sys.stderr)
try:
return fix_file(*parameters)
except IOError as error:
print(str(error), file=sys.stderr)
raise error
def fix_multiple_files(filenames, options, output=None):
"""Fix list of files.
Optionally fix files recursively.
"""
results = []
filenames = find_files(filenames, options.recursive, options.exclude)
if options.jobs > 1:
import multiprocessing
pool = multiprocessing.Pool(options.jobs)
rets = []
for name in filenames:
ret = pool.apply_async(_fix_file, ((name, options),))
rets.append(ret)
pool.close()
pool.join()
if options.diff:
for r in rets:
sys.stdout.write(r.get().decode())
sys.stdout.flush()
results.extend([x.get() for x in rets if x is not None])
else:
for name in filenames:
ret = _fix_file((name, options, output))
if ret is None:
continue
if options.diff:
if ret != '':
results.append(ret)
elif options.in_place:
results.append(ret)
else:
original_source = readlines_from_file(name)
if "".join(original_source).splitlines() != ret.splitlines():
results.append(ret)
return results
def is_python_file(filename):
"""Return True if filename is Python file."""
if filename.endswith('.py'):
return True
try:
with open_with_encoding(
filename,
limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f:
text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES)
if not text:
return False
first_line = text.splitlines()[0]
except (IOError, IndexError):
return False
if not PYTHON_SHEBANG_REGEX.match(first_line):
return False
return True
def is_probably_part_of_multiline(line):
"""Return True if line is likely part of a multiline string.
When multiline strings are involved, pep8 reports the error as being
at the start of the multiline string, which doesn't work for us.
"""
return (
'"""' in line or
"'''" in line or
line.rstrip().endswith('\\')
)
def wrap_output(output, encoding):
"""Return output with specified encoding."""
return codecs.getwriter(encoding)(output.buffer
if hasattr(output, 'buffer')
else output)
def get_encoding():
"""Return preferred encoding."""
return locale.getpreferredencoding() or sys.getdefaultencoding()
def main(argv=None, apply_config=True):
"""Command-line entry."""
if argv is None:
argv = sys.argv
try:
# Exit on broken pipe.
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
except AttributeError: # pragma: no cover
# SIGPIPE is not available on Windows.
pass
try:
args = parse_args(argv[1:], apply_config=apply_config)
if args.list_fixes:
for code, description in sorted(supported_fixes()):
print('{code} - {description}'.format(
code=code, description=description))
return EXIT_CODE_OK
if args.files == ['-']:
assert not args.in_place
encoding = sys.stdin.encoding or get_encoding()
read_stdin = sys.stdin.read()
fixed_stdin = fix_code(read_stdin, args, encoding=encoding)
# LineEndingWrapper is unnecessary here due to the symmetry between
# standard in and standard out.
wrap_output(sys.stdout, encoding=encoding).write(fixed_stdin)
if hash(read_stdin) != hash(fixed_stdin):
if args.exit_code:
return EXIT_CODE_EXISTS_DIFF
else:
if args.in_place or args.diff:
args.files = list(set(args.files))
else:
assert len(args.files) == 1
assert not args.recursive
results = fix_multiple_files(args.files, args, sys.stdout)
if args.diff:
ret = any([len(ret) != 0 for ret in results])
else:
# with in-place option
ret = any([ret is not None for ret in results])
if args.exit_code and ret:
return EXIT_CODE_EXISTS_DIFF
except IOError:
return EXIT_CODE_ERROR
except KeyboardInterrupt:
return EXIT_CODE_ERROR # pragma: no cover
class CachedTokenizer(object):
"""A one-element cache around tokenize.generate_tokens().
Original code written by Ned Batchelder, in coverage.py.
"""
def __init__(self):
self.last_text = None
self.last_tokens = None
def generate_tokens(self, text):
"""A stand-in for tokenize.generate_tokens()."""
if text != self.last_text:
string_io = io.StringIO(text)
self.last_tokens = list(
tokenize.generate_tokens(string_io.readline)
)
self.last_text = text
return self.last_tokens
_cached_tokenizer = CachedTokenizer()
generate_tokens = _cached_tokenizer.generate_tokens
if __name__ == '__main__':
sys.exit(main())