Skip to content

Commit 0478d39

Browse files
authored
SONARPY-1002: [Second Attempt] Fix FP on S2275 for nested replacement fields with format specifiers (#2021)
* SONARPY-1002 Fix FP on S2275 for nested replacement fields with format specifiers * SONARPY-1002 small refactoring * SONARPY-1002 convert to switch expression * SONARPY-1002 rename function
1 parent 3ce62c5 commit 0478d39

File tree

3 files changed

+164
-99
lines changed

3 files changed

+164
-99
lines changed

python-checks/src/main/java/org/sonar/python/checks/StringFormat.java

Lines changed: 150 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -172,76 +172,47 @@ public boolean hasNamedFields() {
172172
}
173173

174174
private enum ParseState {
175-
INIT, LCURLY, RCURLY, FIELD, FLAG, FLAG_CHARACTER, FORMAT, FORMAT_LCURLY, FORMAT_FIELD
175+
INIT, RCURLY
176176
}
177-
178177
private static class StrFormatParser {
179178
private boolean hasManualNumbering = false;
180179
private boolean hasAutoNumbering = false;
181180
private int autoNumberingPos = 0;
182181

183-
private String currentFieldName = null;
184-
private String nestedFieldName = null;
185182
private ParseState state = ParseState.INIT;
186183
private int nesting = 0;
187184
private List<ReplacementField> result;
188185

189186
private Consumer<String> issueReporter;
190187
private String value;
191-
private Matcher fieldContentMatcher;
188+
private int pos = 0;
192189

193190
public StrFormatParser(Consumer<String> issueReporter, String value) {
194191
this.issueReporter = issueReporter;
195192
this.value = value;
196-
this.fieldContentMatcher = FORMAT_FIELD_PATTERN.matcher(this.value);
193+
this.pos = 0;
197194
}
198195

196+
199197
public Optional<StringFormat> parse() {
200-
int pos = 0;
198+
pos = 0;
201199
result = new ArrayList<>();
202200

203201
while (pos < value.length()) {
204202
char current = value.charAt(pos);
205203
switch (state) {
206-
case INIT:
207-
pos = parseInitial(current, pos);
208-
break;
209-
case LCURLY:
210-
pos = parseFieldName(current, pos);
211-
break;
212-
case FIELD:
213-
if (!tryParseField(current)) {
204+
case INIT -> {
205+
if (!tryParsingInitial(current)) {
214206
return Optional.empty();
215207
}
216-
break;
217-
case RCURLY:
218-
if (current == '}') {
219-
state = ParseState.INIT;
220-
}
221-
break;
222-
case FLAG:
223-
if (FORMAT_VALID_CONVERSION_FLAGS.indexOf(current) == -1) {
224-
issueReporter.accept(String.format("Fix this formatted string's syntax; !%c is not a valid conversion flag.", current));
208+
}
209+
case RCURLY -> {
210+
if (current != '}') {
211+
issueReporter.accept(SYNTAX_ERROR_MESSAGE);
225212
return Optional.empty();
226213
}
227-
state = ParseState.FLAG_CHARACTER;
228-
break;
229-
case FLAG_CHARACTER:
230-
if (!tryParseFlagCharacter(current)) {
231-
return Optional.empty();
232-
}
233-
break;
234-
case FORMAT:
235-
parseFormatSpecifier(current);
236-
break;
237-
case FORMAT_LCURLY:
238-
pos = parseFormatCurly(pos);
239-
break;
240-
case FORMAT_FIELD:
241-
if (!tryParseFormatSpecifierField(current)) {
242-
return Optional.empty();
243-
}
244-
break;
214+
state = ParseState.INIT;
215+
}
245216
}
246217

247218
pos += 1;
@@ -269,31 +240,9 @@ private boolean checkParserState() {
269240
return true;
270241
}
271242

272-
private boolean tryParseFormatSpecifierField(char current) {
273-
if (current != '}') {
274-
issueReporter.accept(SYNTAX_ERROR_MESSAGE);
275-
return false;
276-
}
277-
278-
result.add(createField(nestedFieldName));
279-
nesting--;
280-
state = ParseState.FORMAT;
281-
return true;
282-
}
283-
284-
private int parseFormatCurly(int pos) {
285-
if (fieldContentMatcher.region(pos, value.length()).find()) {
286-
// This should always match (if nothing else, an empty string), but be defensive
287-
state = ParseState.FORMAT_FIELD;
288-
nestedFieldName = fieldContentMatcher.group("name");
289-
pos = fieldContentMatcher.end() - 1;
290-
}
291-
return pos;
292-
}
293-
294-
private int parseInitial(char current, int pos) {
243+
private boolean tryParsingInitial(char current) {
295244
if (current == '{') {
296-
state = ParseState.LCURLY;
245+
return tryParsingField();
297246
} else if (current == '}') {
298247
state = ParseState.RCURLY;
299248
} else if (current == '\\') {
@@ -304,29 +253,134 @@ private int parseInitial(char current, int pos) {
304253
}
305254
}
306255

256+
return true;
257+
}
258+
259+
private boolean tryParsingField() {
260+
FieldParser fieldParser = new FieldParser(this, value.substring(pos), 0);
261+
boolean successful = fieldParser.tryParse();
262+
this.pos += fieldParser.getPos() - 1;
263+
return successful;
264+
}
265+
266+
267+
public void reportIssue(String issue) {
268+
issueReporter.accept(issue);
269+
}
270+
271+
public void addField(@Nullable String name) {
272+
result.add(createField(name));
273+
}
274+
275+
private ReplacementField createField(@Nullable String name) {
276+
if (name == null) {
277+
hasAutoNumbering = true;
278+
int currentPos = autoNumberingPos;
279+
autoNumberingPos++;
280+
return new PositionalField(DO_NOTHING_VALIDATOR, currentPos);
281+
} else if (FORMAT_NUMBER_PATTERN.matcher(name).find()) {
282+
hasManualNumbering = true;
283+
return new PositionalField(DO_NOTHING_VALIDATOR, Integer.parseInt(name));
284+
} else {
285+
return new NamedField(DO_NOTHING_VALIDATOR, name);
286+
}
287+
}
288+
289+
}
290+
291+
private enum FieldParseState {
292+
LCURLY, FIELD, FLAG, FLAG_CHARACTER, FORMAT, FINISHED
293+
}
294+
private static class FieldParser {
295+
private StrFormatParser parent;
296+
297+
private String currentFieldName = null;
298+
private FieldParseState state = FieldParseState.LCURLY;
299+
private int nesting;
300+
301+
private String value;
302+
private int pos;
303+
private Matcher fieldContentMatcher;
304+
305+
public FieldParser(StrFormatParser parent, String value, int nesting) {
306+
this.parent = parent;
307+
this.value = value;
308+
this.pos = 1;
309+
this.fieldContentMatcher = FORMAT_FIELD_PATTERN.matcher(this.value);
310+
this.nesting = nesting;
311+
}
312+
313+
public int getPos() {
307314
return pos;
308315
}
309316

310-
private void parseFormatSpecifier(char current) {
317+
public boolean tryParse() {
318+
pos = 1;
319+
320+
while (pos < value.length() && state != FieldParseState.FINISHED) {
321+
char current = value.charAt(pos);
322+
boolean successful = switch (state) {
323+
case LCURLY -> {
324+
pos = parseFieldName(current, pos);
325+
yield true;
326+
}
327+
case FIELD -> tryParseField(current);
328+
case FLAG -> tryParseFlag(current);
329+
case FLAG_CHARACTER -> tryParseFlagCharacter(current);
330+
case FORMAT -> tryParseFormatSpecifier(current);
331+
case FINISHED -> throw new IllegalStateException("Unexpected value: " + state);
332+
};
333+
334+
if(!successful) {
335+
return false;
336+
}
337+
338+
pos += 1;
339+
}
340+
341+
return checkParserState();
342+
}
343+
344+
private boolean checkParserState() {
345+
if(state != FieldParseState.FINISHED) {
346+
parent.reportIssue(SYNTAX_ERROR_MESSAGE);
347+
return false;
348+
}
349+
return true;
350+
}
351+
352+
private boolean tryParseFormatSpecifier(char current) {
311353
if (current == '{') {
312-
nesting++;
313-
state = ParseState.FORMAT_LCURLY;
354+
if(!tryParsingNestedField()) {
355+
return false;
356+
}
314357
} else if (current == '}') {
315-
result.add(createField(currentFieldName));
316-
nesting--;
317-
state = ParseState.INIT;
358+
addCurrentField();
359+
state = FieldParseState.FINISHED;
318360
}
361+
return true;
362+
}
363+
364+
private boolean tryParsingNestedField() {
365+
if(this.nesting > 0) {
366+
parent.reportIssue("Fix this formatted string's syntax; Deep nesting is not allowed.");
367+
return false;
368+
}
369+
FieldParser fieldParser = new FieldParser(parent, value.substring(pos), this.nesting + 1);
370+
371+
boolean successful = fieldParser.tryParse();
372+
this.pos += fieldParser.getPos() - 1;
373+
return successful;
319374
}
320375

321376
private boolean tryParseFlagCharacter(char current) {
322377
if (current == ':') {
323-
state = ParseState.FORMAT;
378+
state = FieldParseState.FORMAT;
324379
} else if (current == '}') {
325-
result.add(createField(currentFieldName));
326-
nesting--;
327-
state = ParseState.INIT;
380+
addCurrentField();
381+
state = FieldParseState.FINISHED;
328382
} else {
329-
issueReporter.accept(SYNTAX_ERROR_MESSAGE);
383+
parent.reportIssue(SYNTAX_ERROR_MESSAGE);
330384
return false;
331385
}
332386

@@ -335,26 +389,33 @@ private boolean tryParseFlagCharacter(char current) {
335389

336390
private boolean tryParseField(char current) {
337391
if (current == '!') {
338-
state = ParseState.FLAG;
392+
state = FieldParseState.FLAG;
339393
} else if (current == ':') {
340-
state = ParseState.FORMAT;
394+
state = FieldParseState.FORMAT;
341395
} else if (current == '}') {
342-
nesting--;
343-
result.add(createField(currentFieldName));
344-
state = ParseState.INIT;
396+
addCurrentField();
397+
state = FieldParseState.FINISHED;
345398
} else {
346-
issueReporter.accept(SYNTAX_ERROR_MESSAGE);
399+
parent.reportIssue(SYNTAX_ERROR_MESSAGE);
347400
return false;
348401
}
349402
return true;
350403
}
351404

405+
private boolean tryParseFlag(char current) {
406+
if (FORMAT_VALID_CONVERSION_FLAGS.indexOf(current) == -1) {
407+
parent.reportIssue(String.format("Fix this formatted string's syntax; !%c is not a valid conversion flag.", current));
408+
return false;
409+
}
410+
state = FieldParseState.FLAG_CHARACTER;
411+
return true;
412+
}
413+
352414
private int parseFieldName(char current, int pos) {
353415
if (current == '{') {
354-
state = ParseState.INIT;
416+
state = FieldParseState.FINISHED;
355417
} else {
356-
state = ParseState.FIELD;
357-
nesting++;
418+
state = FieldParseState.FIELD;
358419
if (fieldContentMatcher.region(pos, value.length()).find()) {
359420
// This should always match (if nothing else, an empty string), but be defensive
360421
currentFieldName = fieldContentMatcher.group("name");
@@ -365,21 +426,12 @@ private int parseFieldName(char current, int pos) {
365426
return pos;
366427
}
367428

368-
private ReplacementField createField(@Nullable String name) {
369-
if (name == null) {
370-
hasAutoNumbering = true;
371-
int currentPos = autoNumberingPos;
372-
autoNumberingPos++;
373-
return new PositionalField(DO_NOTHING_VALIDATOR, currentPos);
374-
} else if (FORMAT_NUMBER_PATTERN.matcher(name).find()) {
375-
hasManualNumbering = true;
376-
return new PositionalField(DO_NOTHING_VALIDATOR, Integer.parseInt(name));
377-
} else {
378-
return new NamedField(DO_NOTHING_VALIDATOR, name);
379-
}
429+
private void addCurrentField() {
430+
parent.addField(currentFieldName);
380431
}
381432
}
382433

434+
383435
public static Optional<StringFormat> createFromStrFormatStyle(Consumer<String> issueReporter, String value) {
384436
// Format -> '{' [FieldName] ['!' Conversion] [':' FormatSpec*] '}'
385437
// FormatSpec -> '{' [FieldName] '}' | Character

python-checks/src/test/java/org/sonar/python/checks/StringFormatMisuseCheckTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@ void test_printf_style() {
3333
void test_str_format_style() {
3434
PythonCheckVerifier.verify("src/test/resources/checks/stringFormatMisuseStrFormat.py", new StringFormatMisuseCheck());
3535
}
36-
}
36+
}

python-checks/src/test/resources/checks/stringFormatMisuseStrFormat.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ def __getitem__(self, key):
66
return 42
77

88
def arguments():
9+
'{}'.format('one') # Ok
910
'{a:{b[[]]}}'.format(a=3.14, b={'[': 10}) # Noncompliant {{Fix this formatted string's syntax.}}
1011
'{:{}} {:{}}'.format('one', 'two', 'three') # Noncompliant
1112
'{} {} {}'.format('one') # Noncompliant {{Provide a value for field(s) with index 1, 2.}}
@@ -27,6 +28,10 @@ def arguments():
2728
def format_syntax():
2829
"{0".format(1) # Noncompliant {{Fix this formatted string's syntax.}}
2930
#^^^^
31+
"{{{".format(1) # Noncompliant
32+
"}}}".format(1) # Noncompliant
33+
"}}".format(1) # Ok
34+
"}} ".format(1) # Ok
3035
"0}".format(1) # Noncompliant
3136
"{[".format() # Noncompliant
3237
"{a[]]}".format(a=0) # Noncompliant
@@ -69,6 +74,14 @@ def nested_format():
6974
'{a:{0}{1}{b}}{1:{2}{2}}'.format('one', 'two', a='a', b='b') # Noncompliant {{Provide a value for field(s) with index 2.}}
7075
'{a:{b}{c}{d}}{:{}{e}}'.format('one', 'two', a='a', b='b', c='c', d='d', e='e') # OK
7176
'{a:{0}{1}{b}}{0:{2}{2}}'.format('one', 'two', 'three', a='a', b='b') # OK
77+
"{value:0.{digits:{nested}}}".format(value=1.234, digits=3, nested=123) # Noncompliant {{Fix this formatted string's syntax; Deep nesting is not allowed.}}
78+
"{value:0.{digits!z}}".format(value=1.234, digits=3) # Noncompliant {{Fix this formatted string's syntax; !z is not a valid conversion flag.}}
79+
"{value:0.{digits!{nested}}}".format(value=1.234, digits=3) # Noncompliant {{Fix this formatted string's syntax; !{ is not a valid conversion flag.}}
80+
"{value:0.{digits:d}}".format(value=1.234, digits=3) # OK
81+
"{value:0.{digits:d}}".format(value=1.234, digits=3) # OK
82+
"{value:0.{digits!a{nested}}}".format(value=1.234, digits=3) # Noncompliant {{Fix this formatted string's syntax.}}
83+
"{value:0.{digits!a:<}}".format(value=1.234, digits=3) # OK
84+
"{value:0.{digits!a}}".format(value=1.234, digits=3) # OK
7285

7386
def other():
7487
f1 = '{} {} {} {}'

0 commit comments

Comments
 (0)