Skip to content

Commit 3cc0fc7

Browse files
authored
[Bug Fix]: ScriptUtils converts adjacent string literals to invalid syntax #11206
I've implemented the whitespace handling fix in ScriptSplitter to preserve a newline only when whitespace with an EOL appears between two quoted strings, and added a unit test to cover the reported case. I identified the whitespace-collapsing logic in ScriptSplitter that turned EOLs into spaces, which broke PostgreSQL’s adjacent string literal concatenation when a newline is required between literals. I updated ScriptSplitter to defer whitespace emission and only preserve a newline when the whitespace contains an EOL and it separates two quoted strings; otherwise it still collapses to a single space. This keeps current behavior broadly intact while fixing the bug.
1 parent 73726f4 commit 3cc0fc7

File tree

1 file changed

+63
-26
lines changed

1 file changed

+63
-26
lines changed

modules/database-commons/src/main/java/org/testcontainers/ext/ScriptSplitter.java

Lines changed: 63 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,41 +19,78 @@ class ScriptSplitter {
1919

2020
private final StringBuilder sb = new StringBuilder();
2121

22+
// Tracks deferred whitespace between lexems in standard parsing mode
23+
private boolean pendingWhitespace = false;
24+
private boolean pendingWhitespaceHadEol = false;
25+
2226
/**
2327
* Standard parsing:
2428
* 1. Remove comments
2529
* 2. Shrink whitespace and eols
2630
* 3. Split on separator
2731
*/
2832
void split() {
29-
Lexem l;
30-
while ((l = scanner.next()) != Lexem.EOF) {
31-
switch (l) {
32-
case SEPARATOR:
33-
flushStringBuilder();
34-
break;
35-
case COMMENT:
36-
//skip
37-
break;
38-
case WHITESPACE:
39-
if (sb.length() == 0 || sb.charAt(sb.length() - 1) != ' ') {
40-
sb.append(' ');
41-
}
42-
break;
43-
case IDENTIFIER:
44-
appendMatch();
45-
if ("begin".equalsIgnoreCase(scanner.getCurrentMatch())) {
46-
compoundStatement(false);
47-
flushStringBuilder();
48-
}
49-
break;
50-
default:
51-
appendMatch();
52-
}
53-
}
54-
flushStringBuilder();
33+
Lexem l;
34+
while ((l = scanner.next()) != Lexem.EOF) {
35+
switch (l) {
36+
case SEPARATOR:
37+
// statement boundary, reset any pending whitespace
38+
pendingWhitespace = false;
39+
pendingWhitespaceHadEol = false;
40+
flushStringBuilder();
41+
break;
42+
case COMMENT:
43+
// skip comments; keep pending whitespace as-is
44+
break;
45+
case WHITESPACE: {
46+
// Defer emitting whitespace until we know what follows.
47+
// This allows us to preserve a newline between adjacent quoted strings,
48+
// which is required by some SQL dialects (e.g. PostgreSQL) to concatenate literals.
49+
final String ws = scanner.getCurrentMatch();
50+
pendingWhitespace = true;
51+
pendingWhitespaceHadEol = ws.indexOf('\n') >= 0 || ws.indexOf('\r') >= 0;
52+
break;
53+
}
54+
case IDENTIFIER: {
55+
emitPendingWhitespaceIfNeeded(l);
56+
appendMatch();
57+
if ("begin".equalsIgnoreCase(scanner.getCurrentMatch())) {
58+
compoundStatement(false);
59+
flushStringBuilder();
60+
}
61+
break;
62+
}
63+
default:
64+
emitPendingWhitespaceIfNeeded(l);
65+
appendMatch();
66+
}
67+
}
68+
flushStringBuilder();
5569
}
5670

71+
// helper: emits pending whitespace before the given next lexem
72+
private void emitPendingWhitespaceIfNeeded(Lexem nextLexem) {
73+
if (!pendingWhitespace) {
74+
return;
75+
}
76+
// Decide between ' ' and '\n'
77+
// Preserve a newline only when it appeared in the original whitespace
78+
// and it separates two quoted strings.
79+
final boolean prevEndsWithQuote =
80+
sb.length() > 0 && (sb.charAt(sb.length() - 1) == '\'' || sb.charAt(sb.length() - 1) == '"');
81+
if (pendingWhitespaceHadEol && prevEndsWithQuote && Lexem.QUOTED_STRING.equals(nextLexem)) {
82+
if (sb.length() == 0 || sb.charAt(sb.length() - 1) != '\n') {
83+
sb.append('\n');
84+
}
85+
} else {
86+
if (sb.length() == 0 || sb.charAt(sb.length() - 1) != ' ') {
87+
sb.append(' ');
88+
}
89+
}
90+
pendingWhitespace = false;
91+
pendingWhitespaceHadEol = false;
92+
}
93+
5794
/**
5895
* Compound statement ('create procedure') mode:
5996
* 1. Do not remove comments

0 commit comments

Comments
 (0)