Skip to content

Commit 89ab7d6

Browse files
han-yan01facebook-github-bot
authored andcommitted
[presto][verifier] Replace regex-based json_parse safety wrapper with AST-level rewriter (#27202)
Summary: testing infra fix: ``` com.facebook.presto.verifier.framework.PrestoQueryException: com.facebook.presto.spi.PrestoException: Cannot convert '' to JSON ``` The previous applyJsonParseSafetyWrapper() used SqlFormatter.formatSql() -> regex -> sqlParser.createStatement() round-trip that silently failed on complex queries with lambdas and $internal$try(BIND(...)) patterns, leaving bare json_parse() calls that crash on empty strings during verifier replay. Replace with AST-level JsonParseTryWrapper using DefaultTreeRewriter + ExpressionTreeRewriter<Boolean> (same pattern as FunctionCallRewriter). Wraps json_parse FunctionCall nodes directly in TryExpression on the AST, eliminating the fragile format/re-parse cycle. The feature is gated behind a `json-parse-safety-wrapper-enabled` config flag (default false). Enabled in sapphire velox shadow testing via run-shadow-test.sh. # Release Notes ``` == NO RELEASE NOTE == ``` Differential Revision: D94175149
1 parent ee7c62a commit 89ab7d6

File tree

4 files changed

+203
-27
lines changed

4 files changed

+203
-27
lines changed

presto-verifier/src/main/java/com/facebook/presto/verifier/framework/VerifierConfig.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ public class VerifierConfig
7070
private boolean extendedVerification;
7171
private String runningMode = CONTROL_TEST_MODE;
7272

73+
private boolean jsonParseSafetyWrapperEnabled;
74+
7375
private Multimap<String, FunctionCallSubstitute> functionSubstitutes = ImmutableMultimap.of();
7476

7577
@NotNull
@@ -455,6 +457,19 @@ public VerifierConfig setRunningMode(String runningMode)
455457
return this;
456458
}
457459

460+
public boolean isJsonParseSafetyWrapperEnabled()
461+
{
462+
return jsonParseSafetyWrapperEnabled;
463+
}
464+
465+
@ConfigDescription("Wrap bare json_parse() calls with TRY() during query rewriting to prevent failures on malformed JSON")
466+
@Config("json-parse-safety-wrapper-enabled")
467+
public VerifierConfig setJsonParseSafetyWrapperEnabled(boolean jsonParseSafetyWrapperEnabled)
468+
{
469+
this.jsonParseSafetyWrapperEnabled = jsonParseSafetyWrapperEnabled;
470+
return this;
471+
}
472+
458473
public Multimap<String, FunctionCallSubstitute> getFunctionSubstitutes()
459474
{
460475
return functionSubstitutes;

presto-verifier/src/main/java/com/facebook/presto/verifier/rewrite/QueryRewriter.java

Lines changed: 82 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import com.facebook.presto.common.type.TypeManager;
2424
import com.facebook.presto.common.type.TypeSignature;
2525
import com.facebook.presto.common.type.TypeSignatureParameter;
26-
import com.facebook.presto.sql.SqlFormatter;
2726
import com.facebook.presto.sql.parser.SqlParser;
2827
import com.facebook.presto.sql.planner.LiteralEncoder;
2928
import com.facebook.presto.sql.tree.AllColumns;
@@ -36,12 +35,15 @@
3635
import com.facebook.presto.sql.tree.DropTable;
3736
import com.facebook.presto.sql.tree.DropView;
3837
import com.facebook.presto.sql.tree.Expression;
38+
import com.facebook.presto.sql.tree.ExpressionRewriter;
39+
import com.facebook.presto.sql.tree.ExpressionTreeRewriter;
3940
import com.facebook.presto.sql.tree.FunctionCall;
4041
import com.facebook.presto.sql.tree.Identifier;
4142
import com.facebook.presto.sql.tree.Insert;
4243
import com.facebook.presto.sql.tree.IsNullPredicate;
4344
import com.facebook.presto.sql.tree.LikeClause;
4445
import com.facebook.presto.sql.tree.LogicalBinaryExpression;
46+
import com.facebook.presto.sql.tree.Node;
4547
import com.facebook.presto.sql.tree.Property;
4648
import com.facebook.presto.sql.tree.QualifiedName;
4749
import com.facebook.presto.sql.tree.Query;
@@ -51,9 +53,10 @@
5153
import com.facebook.presto.sql.tree.ShowCreate;
5254
import com.facebook.presto.sql.tree.SingleColumn;
5355
import com.facebook.presto.sql.tree.Statement;
56+
import com.facebook.presto.sql.tree.SubqueryExpression;
57+
import com.facebook.presto.sql.tree.TryExpression;
5458
import com.facebook.presto.verifier.framework.ClusterType;
5559
import com.facebook.presto.verifier.framework.Column;
56-
import com.facebook.presto.verifier.framework.JsonParseSafetyWrapper;
5760
import com.facebook.presto.verifier.framework.QueryConfiguration;
5861
import com.facebook.presto.verifier.framework.QueryException;
5962
import com.facebook.presto.verifier.framework.QueryObjectBundle;
@@ -111,8 +114,6 @@
111114

112115
public class QueryRewriter
113116
{
114-
private static final com.facebook.airlift.log.Logger log = com.facebook.airlift.log.Logger.get(QueryRewriter.class);
115-
116117
private final SqlParser sqlParser;
117118
private final TypeManager typeManager;
118119
private final BlockEncodingSerde blockEncodingSerde;
@@ -121,6 +122,7 @@ public class QueryRewriter
121122
private final Map<ClusterType, List<Property>> tableProperties;
122123
private final Map<ClusterType, Boolean> reuseTables;
123124
private final Optional<FunctionCallRewriter> functionCallRewriter;
125+
private final boolean jsonParseSafetyWrapperEnabled;
124126

125127
public QueryRewriter(
126128
SqlParser sqlParser,
@@ -131,7 +133,7 @@ public QueryRewriter(
131133
Map<ClusterType, List<Property>> tableProperties,
132134
Map<ClusterType, Boolean> reuseTables)
133135
{
134-
this(sqlParser, typeManager, blockEncodingSerde, prestoAction, tablePrefixes, tableProperties, reuseTables, ImmutableMultimap.of());
136+
this(sqlParser, typeManager, blockEncodingSerde, prestoAction, tablePrefixes, tableProperties, reuseTables, ImmutableMultimap.of(), false);
135137
}
136138

137139
public QueryRewriter(
@@ -143,6 +145,20 @@ public QueryRewriter(
143145
Map<ClusterType, List<Property>> tableProperties,
144146
Map<ClusterType, Boolean> reuseTables,
145147
Multimap<String, FunctionCallSubstitute> functionSubstitutes)
148+
{
149+
this(sqlParser, typeManager, blockEncodingSerde, prestoAction, tablePrefixes, tableProperties, reuseTables, functionSubstitutes, false);
150+
}
151+
152+
public QueryRewriter(
153+
SqlParser sqlParser,
154+
TypeManager typeManager,
155+
BlockEncodingSerde blockEncodingSerde,
156+
PrestoAction prestoAction,
157+
Map<ClusterType, QualifiedName> tablePrefixes,
158+
Map<ClusterType, List<Property>> tableProperties,
159+
Map<ClusterType, Boolean> reuseTables,
160+
Multimap<String, FunctionCallSubstitute> functionSubstitutes,
161+
boolean jsonParseSafetyWrapperEnabled)
146162
{
147163
this.sqlParser = requireNonNull(sqlParser, "sqlParser is null");
148164
this.typeManager = requireNonNull(typeManager, "typeManager is null");
@@ -152,26 +168,64 @@ public QueryRewriter(
152168
this.tableProperties = ImmutableMap.copyOf(tableProperties);
153169
this.reuseTables = ImmutableMap.copyOf(reuseTables);
154170
this.functionCallRewriter = FunctionCallRewriter.getInstance(functionSubstitutes, typeManager);
171+
this.jsonParseSafetyWrapperEnabled = jsonParseSafetyWrapperEnabled;
155172
}
156173

157174
/**
158-
* Helper method to apply json_parse safety wrapper to a query.
159-
* Wraps json_parse() calls with TRY() to handle malformed JSON gracefully.
160-
* If re-parsing fails, returns the original query unchanged.
175+
* Wraps bare json_parse() calls with TRY() using AST rewriting.
176+
* Operates directly on the AST to avoid fragile SQL format/re-parse round-trips
177+
* that silently fail on complex queries with lambdas or internal function patterns.
161178
*/
162-
private Query applyJsonParseSafetyWrapper(Query query)
179+
private static Query applyJsonParseSafetyWrapper(Query query)
163180
{
164-
try {
165-
String sql = SqlFormatter.formatSql(query, Optional.empty());
166-
String fixedSql = JsonParseSafetyWrapper.wrapUnsafeJsonParse(sql);
167-
if (!sql.equals(fixedSql)) {
168-
return (Query) sqlParser.createStatement(fixedSql, PARSING_OPTIONS);
169-
}
170-
}
171-
catch (Exception e) {
172-
log.warn(e, "Failed to apply json_parse safety wrapper, using original query");
181+
return (Query) new JsonParseTryWrapper().process(query, null);
182+
}
183+
184+
/**
185+
* AST-level rewriter that wraps json_parse() FunctionCall nodes in TryExpression.
186+
* Uses the same DefaultTreeRewriter + ExpressionTreeRewriter pattern as FunctionCallRewriter.
187+
*/
188+
private static class JsonParseTryWrapper
189+
extends DefaultTreeRewriter<Void>
190+
{
191+
@Override
192+
protected Node visitExpression(Expression node, Void context)
193+
{
194+
JsonParseTryWrapper statementRewriter = this;
195+
196+
return ExpressionTreeRewriter.rewriteWith(new ExpressionRewriter<Boolean>()
197+
{
198+
@Override
199+
public Expression rewriteFunctionCall(FunctionCall original, Boolean insideTry, ExpressionTreeRewriter<Boolean> treeRewriter)
200+
{
201+
FunctionCall defaultRewrite = treeRewriter.defaultRewrite(original, false);
202+
if (defaultRewrite.getName().getSuffix().equalsIgnoreCase("json_parse") && !Boolean.TRUE.equals(insideTry)) {
203+
return new TryExpression(defaultRewrite);
204+
}
205+
return defaultRewrite;
206+
}
207+
208+
@Override
209+
public Expression rewriteTryExpression(TryExpression original, Boolean insideTry, ExpressionTreeRewriter<Boolean> treeRewriter)
210+
{
211+
Expression inner = treeRewriter.rewrite(original.getInnerExpression(), true);
212+
if (inner != original.getInnerExpression()) {
213+
return new TryExpression(inner);
214+
}
215+
return original;
216+
}
217+
218+
@Override
219+
public Expression rewriteSubqueryExpression(SubqueryExpression expression, Boolean insideTry, ExpressionTreeRewriter<Boolean> treeRewriter)
220+
{
221+
Node rewritten = statementRewriter.process(expression.getQuery(), null);
222+
if (expression.getQuery() == rewritten) {
223+
return expression;
224+
}
225+
return new SubqueryExpression((Query) rewritten);
226+
}
227+
}, node, false);
173228
}
174-
return query;
175229
}
176230

177231
public QueryObjectBundle rewriteQuery(@Language("SQL") String query, QueryConfiguration queryConfiguration, ClusterType clusterType)
@@ -197,8 +251,9 @@ public QueryObjectBundle rewriteQuery(@Language("SQL") String query, QueryConfig
197251
FunctionCallRewriter.RewriterResult rewriterResult = functionCallRewriter.get().rewrite(createQuery);
198252
createQuery = (Query) rewriterResult.getRewrittenNode();
199253
functionSubstitutions = rewriterResult.getSubstitutions();
200-
201-
// Apply safety wrapper for json_parse() calls to prevent failures on malformed JSON
254+
}
255+
// Apply safety wrapper for json_parse() calls to prevent failures on malformed JSON
256+
if (jsonParseSafetyWrapperEnabled) {
202257
createQuery = applyJsonParseSafetyWrapper(createQuery);
203258
}
204259
if (shouldReuseTable && !functionSubstitutions.isPresent()) {
@@ -244,8 +299,9 @@ public QueryObjectBundle rewriteQuery(@Language("SQL") String query, QueryConfig
244299
FunctionCallRewriter.RewriterResult rewriterResult = functionCallRewriter.get().rewrite(insertQuery);
245300
insertQuery = (Query) rewriterResult.getRewrittenNode();
246301
functionSubstitutions = rewriterResult.getSubstitutions();
247-
248-
// Apply safety wrapper for json_parse() calls
302+
}
303+
// Apply safety wrapper for json_parse() calls
304+
if (jsonParseSafetyWrapperEnabled) {
249305
insertQuery = applyJsonParseSafetyWrapper(insertQuery);
250306
}
251307
if (shouldReuseTable && !functionSubstitutions.isPresent()) {
@@ -291,8 +347,9 @@ public QueryObjectBundle rewriteQuery(@Language("SQL") String query, QueryConfig
291347
FunctionCallRewriter.RewriterResult rewriterResult = functionCallRewriter.get().rewrite(queryBody);
292348
queryBody = (Query) rewriterResult.getRewrittenNode();
293349
functionSubstitutions = rewriterResult.getSubstitutions();
294-
295-
// Apply safety wrapper for json_parse() calls
350+
}
351+
// Apply safety wrapper for json_parse() calls
352+
if (jsonParseSafetyWrapperEnabled) {
296353
queryBody = applyJsonParseSafetyWrapper(queryBody);
297354
}
298355

presto-verifier/src/main/java/com/facebook/presto/verifier/rewrite/VerificationQueryRewriterFactory.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ public class VerificationQueryRewriterFactory
5757
private final boolean testReuseTable;
5858

5959
private final Multimap<String, FunctionCallSubstitute> functionSubstitutes;
60+
private final boolean jsonParseSafetyWrapperEnabled;
6061

6162
@Inject
6263
public VerificationQueryRewriterFactory(
@@ -77,6 +78,7 @@ public VerificationQueryRewriterFactory(
7778
this.controlReuseTable = controlConfig.isReuseTable();
7879
this.testReuseTable = testConfig.isReuseTable();
7980
this.functionSubstitutes = verifierConfig.getFunctionSubstitutes();
81+
this.jsonParseSafetyWrapperEnabled = verifierConfig.isJsonParseSafetyWrapperEnabled();
8082
}
8183

8284
@Override
@@ -90,7 +92,8 @@ public QueryRewriter create(PrestoAction prestoAction)
9092
ImmutableMap.of(CONTROL, controlTablePrefix, TEST, testTablePrefix),
9193
ImmutableMap.of(CONTROL, controlTableProperties, TEST, testTableProperties),
9294
ImmutableMap.of(CONTROL, controlReuseTable, TEST, testReuseTable),
93-
functionSubstitutes);
95+
functionSubstitutes,
96+
jsonParseSafetyWrapperEnabled);
9497
}
9598

9699
private static List<Property> constructProperties(Map<String, Object> propertiesMap)

presto-verifier/src/test/java/com/facebook/presto/verifier/rewrite/TestQueryRewriter.java

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ public class TestQueryRewriter
8484
private static final QueryRewriteConfig QUERY_REWRITE_CONFIG = new QueryRewriteConfig()
8585
.setTablePrefix("local.tmp")
8686
.setTableProperties("{\"p_int\": 30, \"p_long\": 4294967297, \"p_double\": 1.5, \"p_varchar\": \"test\", \"p_bool\": true}");
87-
private static final VerifierConfig VERIFIER_CONFIG = new VerifierConfig();
87+
private static final VerifierConfig VERIFIER_CONFIG = new VerifierConfig()
88+
.setJsonParseSafetyWrapperEnabled(true);
8889
private static final SqlParser SQL_PARSER = new SqlParser(new SqlParserOptions().allowIdentifierSymbol(COLON, AT_SIGN));
8990
private static final BlockEncodingSerde blockEncodingSerde = new BlockEncodingManager();
9091

@@ -668,6 +669,106 @@ private static void assertStatements(List<Statement> actual, List<Statement> exp
668669
assertEquals(actualQueries, expectedQueries);
669670
}
670671

672+
@Test
673+
public void testJsonParseSafetyWrapper()
674+
{
675+
QueryRewriter queryRewriter = getQueryRewriter();
676+
677+
// Bare json_parse should be wrapped in TRY
678+
assertCreateTableAs(
679+
queryRewriter.rewriteQuery(
680+
"SELECT json_parse(b) FROM test_table",
681+
CONFIGURATION, CONTROL).getQuery(),
682+
"SELECT TRY(json_parse(b)) FROM test_table");
683+
684+
// Already wrapped in TRY should not be double-wrapped
685+
assertCreateTableAs(
686+
queryRewriter.rewriteQuery(
687+
"SELECT TRY(json_parse(b)) FROM test_table",
688+
CONFIGURATION, CONTROL).getQuery(),
689+
"SELECT TRY(json_parse(b)) FROM test_table");
690+
691+
// Mixed: some wrapped, some not
692+
assertCreateTableAs(
693+
queryRewriter.rewriteQuery(
694+
"SELECT TRY(json_parse(b)), json_parse(b) FROM test_table",
695+
CONFIGURATION, CONTROL).getQuery(),
696+
"SELECT TRY(json_parse(b)), TRY(json_parse(b)) FROM test_table");
697+
698+
// json_parse inside complex expression with lambda
699+
assertCreateTableAs(
700+
queryRewriter.rewriteQuery(
701+
"SELECT transform(ARRAY[b], x -> json_parse(x)) FROM test_table",
702+
CONFIGURATION, CONTROL).getQuery(),
703+
"SELECT transform(ARRAY[b], x -> TRY(json_parse(x))) FROM test_table");
704+
705+
// json_parse in subquery
706+
assertCreateTableAs(
707+
queryRewriter.rewriteQuery(
708+
"SELECT * FROM (SELECT json_parse(b) AS parsed FROM test_table) t",
709+
CONFIGURATION, CONTROL).getQuery(),
710+
"SELECT * FROM (SELECT TRY(json_parse(b)) AS parsed FROM test_table) t");
711+
712+
// json_parse in WHERE clause
713+
assertCreateTableAs(
714+
queryRewriter.rewriteQuery(
715+
"SELECT a FROM test_table WHERE json_parse(b) IS NOT NULL",
716+
CONFIGURATION, CONTROL).getQuery(),
717+
"SELECT a FROM test_table WHERE TRY(json_parse(b)) IS NOT NULL");
718+
719+
// json_parse nested inside json_extract (common pattern from function substitution)
720+
assertCreateTableAs(
721+
queryRewriter.rewriteQuery(
722+
"SELECT json_extract(json_parse(CAST(b AS varchar)), '$.key') FROM test_table",
723+
CONFIGURATION, CONTROL).getQuery(),
724+
"SELECT json_extract(TRY(json_parse(CAST(b AS varchar))), '$.key') FROM test_table");
725+
726+
// No json_parse - should pass through unchanged
727+
assertCreateTableAs(
728+
queryRewriter.rewriteQuery(
729+
"SELECT a, b FROM test_table",
730+
CONFIGURATION, CONTROL).getQuery(),
731+
"SELECT a, b FROM test_table");
732+
733+
// Multiple json_parse in SELECT + WHERE (both should be wrapped independently)
734+
assertCreateTableAs(
735+
queryRewriter.rewriteQuery(
736+
"SELECT json_parse(b) FROM test_table WHERE CAST(json_parse(b) AS VARCHAR) <> ''",
737+
CONFIGURATION, CONTROL).getQuery(),
738+
"SELECT TRY(json_parse(b)) FROM test_table WHERE CAST(TRY(json_parse(b)) AS VARCHAR) <> ''");
739+
740+
// COALESCE(TRY(json_parse(col)), JSON 'null') - json_parse already inside TRY, no wrapping
741+
assertCreateTableAs(
742+
queryRewriter.rewriteQuery(
743+
"SELECT COALESCE(TRY(json_parse(b)), JSON 'null') FROM test_table",
744+
CONFIGURATION, CONTROL).getQuery(),
745+
"SELECT COALESCE(TRY(json_parse(b)), JSON 'null') FROM test_table");
746+
747+
// Many json_parse calls in a single query (stress test for AST approach vs regex)
748+
assertCreateTableAs(
749+
queryRewriter.rewriteQuery(
750+
"SELECT json_parse(b), TRY(json_parse(b)), json_parse(CONCAT(b, '')), json_parse(UPPER(b)), json_parse(LOWER(b)) FROM test_table",
751+
CONFIGURATION, CONTROL).getQuery(),
752+
"SELECT TRY(json_parse(b)), TRY(json_parse(b)), TRY(json_parse(CONCAT(b, ''))), TRY(json_parse(UPPER(b))), TRY(json_parse(LOWER(b))) FROM test_table");
753+
}
754+
755+
@Test
756+
public void testJsonParseSafetyWrapperWithFunctionSubstitutes()
757+
{
758+
// Test that json_parse wrapping works AFTER function substitution
759+
VerifierConfig verifierConfig = new VerifierConfig()
760+
.setJsonParseSafetyWrapperEnabled(true)
761+
.setFunctionSubstitutes("/arbitrary(x)/min(x)/");
762+
QueryRewriter queryRewriter = getQueryRewriter(new QueryRewriteConfig(), verifierConfig);
763+
764+
// Function substitution + json_parse wrapping should both apply
765+
assertCreateTableAs(
766+
queryRewriter.rewriteQuery(
767+
"SELECT json_parse(ARBITRARY(b)) FROM test_table",
768+
CONFIGURATION, CONTROL).getQuery(),
769+
"SELECT TRY(json_parse(MIN(b))) FROM test_table");
770+
}
771+
671772
private QueryRewriter getQueryRewriter()
672773
{
673774
return getQueryRewriter(QUERY_REWRITE_CONFIG, VERIFIER_CONFIG);

0 commit comments

Comments
 (0)