Skip to content

Commit c195796

Browse files
peterenescufacebook-github-bot
authored andcommitted
fix: Fix SQL building for dereference queries (facebookincubator#12989)
Summary: Pull Request resolved: facebookincubator#12989 Currently the Presto expression fuzzer is broken for dereference queries and we have subsequently removed the flag `--enable_dereference` from continuous runs to stabilize the fuzzer. This issue comes from how test Presto SQL is constructed, namely two particular issues: 1. How field names are attached to their parent 2. How rows with particular field names are constructed (and then dereferenced) --- For the former (point #1), the fuzzer is attempting to execute the following expression: ``` if("c0","c1")["row_field0"] ``` This expression aims to access `row_field0` of row `c1` if `c0` is true. Unfortunately the SQL isn't properly constructed and fails because `row_field0` is undefined: ``` Execute presto sql: SELECT row_field0 as p0, row_number as p1 FROM (t_values) ``` This review updates the logic in both PrestoSql (toCallInputsSql function) and the visit plan node function of the PrestoQuery runner to add the missing SQL. --- For the latter (point #2), the fuzzer attempts to build a row and immediately access a particular field, as an example: ``` CONCAT(c0)[row_field0] ``` or, in essence ``` row(c0).row_field0 ``` The problem is two-fold. Firstly, the default field name is of the form `field0`, `field1`, etc., so we will need to update the field name. Secondly, the above is not possible in Presto; it will fail: ``` ROW('hello', 5).field0 ``` The way to make the above work is to cast and explicitly define the field names and types: ``` select cast(row('hello', 15) as row(field0 varchar, field1 integer)).field0 ``` This PR adds this functionality (see test plan for example). Reviewed By: kagamiori Differential Revision: D72755054 fbshipit-source-id: d619eeec0c0f320c476c0056b9e00d16de3c1485
1 parent 7479a82 commit c195796

File tree

7 files changed

+126
-7
lines changed

7 files changed

+126
-7
lines changed

velox/exec/fuzzer/PrestoQueryRunner.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,12 @@ class PrestoQueryRunnerToSqlPlanNodeVisitor : public PrestoSqlPlanNodeVisitor {
231231
if (auto field =
232232
std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
233233
projection)) {
234-
sql << field->name();
234+
if (field->isInputColumn()) {
235+
sql << field->name();
236+
} else {
237+
toCallInputsSql(field->inputs(), sql);
238+
sql << fmt::format(".{}", field->name());
239+
}
235240
} else if (
236241
auto call = std::dynamic_pointer_cast<const core::CallTypedExpr>(
237242
projection)) {

velox/exec/fuzzer/PrestoSql.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,12 @@ void toCallInputsSql(
133133
if (auto field =
134134
std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
135135
input)) {
136-
sql << field->name();
136+
if (field->isInputColumn()) {
137+
sql << field->name();
138+
} else {
139+
toCallInputsSql(field->inputs(), sql);
140+
sql << fmt::format(".{}", field->name());
141+
}
137142
} else if (
138143
auto call =
139144
std::dynamic_pointer_cast<const core::CallTypedExpr>(input)) {
@@ -325,11 +330,10 @@ std::string toCastSql(const core::CastTypedExpr& cast) {
325330
}
326331

327332
std::string toConcatSql(const core::ConcatTypedExpr& concat) {
328-
std::stringstream sql;
329-
sql << "row(";
330-
toCallInputsSql(concat.inputs(), sql);
331-
sql << ")";
332-
return sql.str();
333+
std::stringstream input;
334+
toCallInputsSql(concat.inputs(), input);
335+
return fmt::format(
336+
"cast(row({}) as {})", input.str(), toTypeSql(concat.type()));
333337
}
334338

335339
template <typename T>

velox/exec/fuzzer/tests/PrestoSqlTest.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,5 +323,28 @@ TEST(PrestoSqlTest, toCallSql) {
323323
"infinity()");
324324
}
325325

326+
TEST(PrestoSqlTest, toConcatSql) {
327+
auto expression = core::ConcatTypedExpr(
328+
{"field0", "field1"},
329+
std::vector<core::TypedExprPtr>{
330+
std::make_shared<core::ConstantTypedExpr>(VARCHAR(), "a"),
331+
std::make_shared<core::ConstantTypedExpr>(VARCHAR(), "b")});
332+
333+
EXPECT_EQ(
334+
toConcatSql(expression),
335+
"cast(row('a', 'b') as ROW(field0 VARCHAR, field1 VARCHAR))");
336+
}
337+
338+
TEST(PrestoSqlTest, toCallInputsSql) {
339+
std::stringstream sql;
340+
auto expression = std::make_shared<core::FieldAccessTypedExpr>(
341+
VARCHAR(),
342+
std::make_shared<core::FieldAccessTypedExpr>(VARCHAR(), "c0"),
343+
"field0");
344+
345+
toCallInputsSql({expression}, sql);
346+
EXPECT_EQ(sql.str(), "c0.field0");
347+
}
348+
326349
} // namespace
327350
} // namespace facebook::velox::exec::test

velox/exec/tests/PlanBuilderTest.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515
*/
1616
#include "velox/exec/tests/utils/PlanBuilder.h"
1717
#include "velox/common/base/tests/GTestUtils.h"
18+
#include "velox/core/Expressions.h"
1819
#include "velox/exec/WindowFunction.h"
1920
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
2021
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
22+
#include "velox/parse/Expressions.h"
23+
#include "velox/parse/IExpr.h"
2124
#include "velox/parse/TypeResolver.h"
2225
#include "velox/vector/tests/utils/VectorTestBase.h"
2326

@@ -250,4 +253,52 @@ TEST_F(PlanBuilderTest, missingOutputType) {
250253
"outputType must be specified");
251254
}
252255

256+
TEST_F(PlanBuilderTest, projectExpressions) {
257+
// Non-typed Expressions.
258+
// Simple field access.
259+
auto data = ROW({"c0"}, {BIGINT()});
260+
VELOX_CHECK_EQ(
261+
PlanBuilder()
262+
.tableScan("tmp", data)
263+
.projectExpressions(
264+
{std::make_shared<core::FieldAccessExpr>("c0", std::nullopt)})
265+
.planNode()
266+
->toString(true, false),
267+
"-- Project[1][expressions: (c0:BIGINT, ROW[\"c0\"])] -> c0:BIGINT\n");
268+
// Dereference test using field access query.
269+
data = ROW({"c0"}, {ROW({"field0"}, {BIGINT()})});
270+
VELOX_CHECK_EQ(
271+
PlanBuilder()
272+
.tableScan("tmp", data)
273+
.projectExpressions({std::make_shared<core::FieldAccessExpr>(
274+
"field0",
275+
std::nullopt,
276+
std::vector<core::ExprPtr>{
277+
std::make_shared<core::FieldAccessExpr>(
278+
"c0", std::nullopt)})})
279+
.planNode()
280+
->toString(true, false),
281+
"-- Project[1][expressions: (field0:BIGINT, ROW[\"c0\"][field0])] -> field0:BIGINT\n");
282+
283+
// Test Typed Expressions
284+
VELOX_CHECK_EQ(
285+
PlanBuilder()
286+
.tableScan("tmp", ROW({"c0"}, {ROW({VARCHAR()})}))
287+
.projectExpressions(
288+
{std::make_shared<core::FieldAccessTypedExpr>(VARCHAR(), "c0")})
289+
.planNode()
290+
->toString(true, false),
291+
"-- Project[1][expressions: (p0:VARCHAR, \"c0\")] -> p0:VARCHAR\n");
292+
VELOX_CHECK_EQ(
293+
PlanBuilder()
294+
.tableScan("tmp", ROW({"c0"}, {ROW({VARCHAR()})}))
295+
.projectExpressions({std::make_shared<core::FieldAccessTypedExpr>(
296+
VARCHAR(),
297+
std::make_shared<core::FieldAccessTypedExpr>(VARCHAR(), "c0"),
298+
"field0")})
299+
.planNode()
300+
->toString(true, false),
301+
"-- Project[1][expressions: (p0:VARCHAR, \"c0\"[\"field0\"])] -> p0:VARCHAR\n");
302+
}
303+
253304
} // namespace facebook::velox::exec::test

velox/exec/tests/PrestoQueryRunnerTest.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,19 @@ TEST_F(PrestoQueryRunnerTest, toSql) {
254254
queryRunner->toSql(plan),
255255
"SELECT avg(c0) filter (where c2) as a0, avg(c1) as a1 FROM tmp");
256256
}
257+
258+
// Test dereference queries.
259+
{
260+
auto plan =
261+
PlanBuilder()
262+
.tableScan("tmp", ROW({"c0"}, {ROW({"field0"}, {BIGINT()})}))
263+
.projectExpressions({std::make_shared<core::FieldAccessTypedExpr>(
264+
VARCHAR(),
265+
std::make_shared<core::FieldAccessTypedExpr>(VARCHAR(), "c0"),
266+
"field0")})
267+
.planNode();
268+
EXPECT_EQ(queryRunner->toSql(plan), "SELECT c0.field0 as p0 FROM (tmp)");
269+
}
257270
}
258271

259272
TEST_F(PrestoQueryRunnerTest, toSqlJoins) {

velox/exec/tests/utils/PlanBuilder.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,27 @@ PlanBuilder& PlanBuilder::projectExpressions(
419419
return *this;
420420
}
421421

422+
PlanBuilder& PlanBuilder::projectExpressions(
423+
const std::vector<std::shared_ptr<const core::ITypedExpr>>& projections) {
424+
std::vector<core::TypedExprPtr> expressions;
425+
std::vector<std::string> projectNames;
426+
for (auto i = 0; i < projections.size(); ++i) {
427+
expressions.push_back(projections[i]);
428+
if (auto fieldExpr =
429+
dynamic_cast<const core::FieldAccessExpr*>(projections[i].get())) {
430+
projectNames.push_back(fieldExpr->getFieldName());
431+
} else {
432+
projectNames.push_back(fmt::format("p{}", i));
433+
}
434+
}
435+
planNode_ = std::make_shared<core::ProjectNode>(
436+
nextPlanNodeId(),
437+
std::move(projectNames),
438+
std::move(expressions),
439+
planNode_);
440+
return *this;
441+
}
442+
422443
PlanBuilder& PlanBuilder::project(const std::vector<std::string>& projections) {
423444
VELOX_CHECK_NOT_NULL(planNode_, "Project cannot be the source node");
424445
std::vector<std::shared_ptr<const core::IExpr>> expressions;

velox/exec/tests/utils/PlanBuilder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,8 @@ class PlanBuilder {
552552
/// the type.
553553
PlanBuilder& projectExpressions(
554554
const std::vector<core::ExprPtr>& projections);
555+
PlanBuilder& projectExpressions(
556+
const std::vector<core::TypedExprPtr>& projections);
555557

556558
/// Similar to project() except 'optionalProjections' could be empty and the
557559
/// function will skip creating a ProjectNode in that case.

0 commit comments

Comments
 (0)