Skip to content

Commit eeb6263

Browse files
asottilemyint
authored andcommitted
String formatting linting (#443)
* Add lint rule for f-strings without placeholders * Add linting for string.format(...) * Add linting for % formatting
1 parent c0193b2 commit eeb6263

File tree

3 files changed

+593
-3
lines changed

3 files changed

+593
-3
lines changed

pyflakes/checker.py

Lines changed: 344 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import functools
1313
import os
1414
import re
15+
import string
1516
import sys
1617
import tokenize
1718

@@ -29,6 +30,8 @@
2930

3031
builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))
3132

33+
parse_format_string = string.Formatter().parse
34+
3235
if PY2:
3336
tokenize_tokenize = tokenize.generate_tokens
3437
else:
@@ -81,6 +84,87 @@ def getAlternatives(n):
8184
TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')
8285

8386

87+
MAPPING_KEY_RE = re.compile(r'\(([^()]*)\)')
88+
CONVERSION_FLAG_RE = re.compile('[#0+ -]*')
89+
WIDTH_RE = re.compile(r'(?:\*|\d*)')
90+
PRECISION_RE = re.compile(r'(?:\.(?:\*|\d*))?')
91+
LENGTH_RE = re.compile('[hlL]?')
92+
# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
93+
VALID_CONVERSIONS = frozenset('diouxXeEfFgGcrsa%')
94+
95+
96+
def _must_match(regex, string, pos):
97+
# type: (Pattern[str], str, int) -> Match[str]
98+
match = regex.match(string, pos)
99+
assert match is not None
100+
return match
101+
102+
103+
def parse_percent_format(s): # type: (str) -> Tuple[PercentFormat, ...]
104+
"""Parses the string component of a `'...' % ...` format call
105+
106+
Copied from https://github.com/asottile/pyupgrade at v1.20.1
107+
"""
108+
109+
def _parse_inner():
110+
# type: () -> Generator[PercentFormat, None, None]
111+
string_start = 0
112+
string_end = 0
113+
in_fmt = False
114+
115+
i = 0
116+
while i < len(s):
117+
if not in_fmt:
118+
try:
119+
i = s.index('%', i)
120+
except ValueError: # no more % fields!
121+
yield s[string_start:], None
122+
return
123+
else:
124+
string_end = i
125+
i += 1
126+
in_fmt = True
127+
else:
128+
key_match = MAPPING_KEY_RE.match(s, i)
129+
if key_match:
130+
key = key_match.group(1) # type: Optional[str]
131+
i = key_match.end()
132+
else:
133+
key = None
134+
135+
conversion_flag_match = _must_match(CONVERSION_FLAG_RE, s, i)
136+
conversion_flag = conversion_flag_match.group() or None
137+
i = conversion_flag_match.end()
138+
139+
width_match = _must_match(WIDTH_RE, s, i)
140+
width = width_match.group() or None
141+
i = width_match.end()
142+
143+
precision_match = _must_match(PRECISION_RE, s, i)
144+
precision = precision_match.group() or None
145+
i = precision_match.end()
146+
147+
# length modifier is ignored
148+
i = _must_match(LENGTH_RE, s, i).end()
149+
150+
try:
151+
conversion = s[i]
152+
except IndexError:
153+
raise ValueError('end-of-string while parsing format')
154+
i += 1
155+
156+
fmt = (key, conversion_flag, width, precision, conversion)
157+
yield s[string_start:string_end], fmt
158+
159+
in_fmt = False
160+
string_start = i
161+
162+
if in_fmt:
163+
raise ValueError('end-of-string while parsing format')
164+
165+
return tuple(_parse_inner())
166+
167+
84168
class _FieldsOrder(dict):
85169
"""Fix order of AST node fields."""
86170

@@ -1241,10 +1325,250 @@ def ignore(self, node):
12411325
PASS = ignore
12421326

12431327
# "expr" type nodes
1244-
BOOLOP = BINOP = UNARYOP = IFEXP = SET = \
1245-
CALL = REPR = ATTRIBUTE = SUBSCRIPT = \
1328+
BOOLOP = UNARYOP = IFEXP = SET = \
1329+
REPR = ATTRIBUTE = SUBSCRIPT = \
12461330
STARRED = NAMECONSTANT = handleChildren
12471331

1332+
def _handle_string_dot_format(self, node):
1333+
try:
1334+
placeholders = tuple(parse_format_string(node.func.value.s))
1335+
except ValueError as e:
1336+
self.report(messages.StringDotFormatInvalidFormat, node, e)
1337+
return
1338+
1339+
class state: # py2-compatible `nonlocal`
1340+
auto = None
1341+
next_auto = 0
1342+
1343+
placeholder_positional = set()
1344+
placeholder_named = set()
1345+
1346+
def _add_key(fmtkey):
1347+
"""Returns True if there is an error which should early-exit"""
1348+
if fmtkey is None: # end of string or `{` / `}` escapes
1349+
return False
1350+
1351+
# attributes / indices are allowed in `.format(...)`
1352+
fmtkey, _, _ = fmtkey.partition('.')
1353+
fmtkey, _, _ = fmtkey.partition('[')
1354+
1355+
try:
1356+
fmtkey = int(fmtkey)
1357+
except ValueError:
1358+
pass
1359+
else: # fmtkey was an integer
1360+
if state.auto is True:
1361+
self.report(messages.StringDotFormatMixingAutomatic, node)
1362+
return True
1363+
else:
1364+
state.auto = False
1365+
1366+
if fmtkey == '':
1367+
if state.auto is False:
1368+
self.report(messages.StringDotFormatMixingAutomatic, node)
1369+
return True
1370+
else:
1371+
state.auto = True
1372+
1373+
fmtkey = state.next_auto
1374+
state.next_auto += 1
1375+
1376+
if isinstance(fmtkey, int):
1377+
placeholder_positional.add(fmtkey)
1378+
else:
1379+
placeholder_named.add(fmtkey)
1380+
1381+
return False
1382+
1383+
for _, fmtkey, spec, _ in placeholders:
1384+
if _add_key(fmtkey):
1385+
return
1386+
1387+
# spec can also contain format specifiers
1388+
if spec is not None:
1389+
try:
1390+
spec_placeholders = tuple(parse_format_string(spec))
1391+
except ValueError as e:
1392+
self.report(messages.StringDotFormatInvalidFormat, node, e)
1393+
return
1394+
1395+
for _, spec_fmtkey, spec_spec, _ in spec_placeholders:
1396+
# can't recurse again
1397+
if spec_spec is not None and '{' in spec_spec:
1398+
self.report(
1399+
messages.StringDotFormatInvalidFormat,
1400+
node,
1401+
'Max string recursion exceeded',
1402+
)
1403+
return
1404+
if _add_key(spec_fmtkey):
1405+
return
1406+
1407+
# bail early if there is *args or **kwargs
1408+
if (
1409+
# python 2.x *args / **kwargs
1410+
getattr(node, 'starargs', None) or
1411+
getattr(node, 'kwargs', None) or
1412+
# python 3.x *args
1413+
any(
1414+
isinstance(arg, getattr(ast, 'Starred', ()))
1415+
for arg in node.args
1416+
) or
1417+
# python 3.x **kwargs
1418+
any(kwd.arg is None for kwd in node.keywords)
1419+
):
1420+
return
1421+
1422+
substitution_positional = set(range(len(node.args)))
1423+
substitution_named = {kwd.arg for kwd in node.keywords}
1424+
1425+
extra_positional = substitution_positional - placeholder_positional
1426+
extra_named = substitution_named - placeholder_named
1427+
1428+
missing_arguments = (
1429+
(placeholder_positional | placeholder_named) -
1430+
(substitution_positional | substitution_named)
1431+
)
1432+
1433+
if extra_positional:
1434+
self.report(
1435+
messages.StringDotFormatExtraPositionalArguments,
1436+
node,
1437+
', '.join(sorted(str(x) for x in extra_positional)),
1438+
)
1439+
if extra_named:
1440+
self.report(
1441+
messages.StringDotFormatExtraNamedArguments,
1442+
node,
1443+
', '.join(sorted(extra_named)),
1444+
)
1445+
if missing_arguments:
1446+
self.report(
1447+
messages.StringDotFormatMissingArgument,
1448+
node,
1449+
', '.join(sorted(str(x) for x in missing_arguments)),
1450+
)
1451+
1452+
def CALL(self, node):
1453+
if (
1454+
isinstance(node.func, ast.Attribute) and
1455+
isinstance(node.func.value, ast.Str) and
1456+
node.func.attr == 'format'
1457+
):
1458+
self._handle_string_dot_format(node)
1459+
self.handleChildren(node)
1460+
1461+
def _handle_percent_format(self, node):
1462+
try:
1463+
placeholders = parse_percent_format(node.left.s)
1464+
except ValueError:
1465+
self.report(
1466+
messages.PercentFormatInvalidFormat,
1467+
node,
1468+
'incomplete format',
1469+
)
1470+
return
1471+
1472+
named = set()
1473+
positional_count = 0
1474+
positional = None
1475+
for _, placeholder in placeholders:
1476+
if placeholder is None:
1477+
continue
1478+
name, _, width, precision, conversion = placeholder
1479+
1480+
if conversion == '%':
1481+
continue
1482+
1483+
if conversion not in VALID_CONVERSIONS:
1484+
self.report(
1485+
messages.PercentFormatUnsupportedFormatCharacter,
1486+
node,
1487+
conversion,
1488+
)
1489+
1490+
if positional is None and conversion:
1491+
positional = name is None
1492+
1493+
for part in (width, precision):
1494+
if part is not None and '*' in part:
1495+
if not positional:
1496+
self.report(
1497+
messages.PercentFormatStarRequiresSequence,
1498+
node,
1499+
)
1500+
else:
1501+
positional_count += 1
1502+
1503+
if positional and name is not None:
1504+
self.report(
1505+
messages.PercentFormatMixedPositionalAndNamed,
1506+
node,
1507+
)
1508+
return
1509+
elif not positional and name is None:
1510+
self.report(
1511+
messages.PercentFormatMixedPositionalAndNamed,
1512+
node,
1513+
)
1514+
return
1515+
1516+
if positional:
1517+
positional_count += 1
1518+
else:
1519+
named.add(name)
1520+
1521+
if (
1522+
isinstance(node.right, (ast.List, ast.Tuple)) and
1523+
# does not have any *splats (py35+ feature)
1524+
not any(
1525+
isinstance(elt, getattr(ast, 'Starred', ()))
1526+
for elt in node.right.elts
1527+
)
1528+
):
1529+
substitution_count = len(node.right.elts)
1530+
if positional and positional_count != substitution_count:
1531+
self.report(
1532+
messages.PercentFormatPositionalCountMismatch,
1533+
node,
1534+
positional_count,
1535+
substitution_count,
1536+
)
1537+
elif not positional:
1538+
self.report(messages.PercentFormatExpectedMapping, node)
1539+
1540+
if (
1541+
isinstance(node.right, ast.Dict) and
1542+
all(isinstance(k, ast.Str) for k in node.right.keys)
1543+
):
1544+
if positional and positional_count > 1:
1545+
self.report(messages.PercentFormatExpectedSequence, node)
1546+
return
1547+
1548+
substitution_keys = {k.s for k in node.right.keys}
1549+
extra_keys = substitution_keys - named
1550+
missing_keys = named - substitution_keys
1551+
if not positional and extra_keys:
1552+
self.report(
1553+
messages.PercentFormatExtraNamedArguments,
1554+
node,
1555+
', '.join(sorted(extra_keys)),
1556+
)
1557+
if not positional and missing_keys:
1558+
self.report(
1559+
messages.PercentFormatMissingArgument,
1560+
node,
1561+
', '.join(sorted(missing_keys)),
1562+
)
1563+
1564+
def BINOP(self, node):
1565+
if (
1566+
isinstance(node.op, ast.Mod) and
1567+
isinstance(node.left, ast.Str)
1568+
):
1569+
self._handle_percent_format(node)
1570+
self.handleChildren(node)
1571+
12481572
NUM = STR = BYTES = ELLIPSIS = CONSTANT = ignore
12491573

12501574
# "slice" type nodes
@@ -1273,7 +1597,24 @@ def RAISE(self, node):
12731597
self.report(messages.RaiseNotImplemented, node)
12741598

12751599
# additional node types
1276-
COMPREHENSION = KEYWORD = FORMATTEDVALUE = JOINEDSTR = handleChildren
1600+
COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren
1601+
1602+
_in_fstring = False
1603+
1604+
def JOINEDSTR(self, node):
1605+
if (
1606+
# the conversion / etc. flags are parsed as f-strings without
1607+
# placeholders
1608+
not self._in_fstring and
1609+
not any(isinstance(x, ast.FormattedValue) for x in node.values)
1610+
):
1611+
self.report(messages.FStringMissingPlaceholders, node)
1612+
1613+
self._in_fstring, orig = True, self._in_fstring
1614+
try:
1615+
self.handleChildren(node)
1616+
finally:
1617+
self._in_fstring = orig
12771618

12781619
def DICT(self, node):
12791620
# Complain if there are duplicate keys with different values

0 commit comments

Comments
 (0)