Skip to content

Commit 9626679

Browse files
committed
fix: not throwing early exception in limit/offset extractor of missing order by
1 parent 63694e6 commit 9626679

File tree

8 files changed

+418
-2
lines changed

8 files changed

+418
-2
lines changed

src/adapter/etl-adapter-postgresql/src/Flow/ETL/Adapter/PostgreSql/PostgreSqlLimitOffsetExtractor.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
namespace Flow\ETL\Adapter\PostgreSql;
66

77
use function Flow\ETL\DSL\array_to_rows;
8-
use function Flow\PostgreSql\DSL\{sql_to_count_query, sql_to_paginated_query};
8+
use function Flow\PostgreSql\DSL\{sql_parse, sql_query_order_by, sql_to_count_query, sql_to_paginated_query};
99
use Flow\ETL\Exception\InvalidArgumentException;
1010
use Flow\ETL\Extractor\Signal;
1111
use Flow\ETL\{Extractor, FlowContext, Schema};
@@ -30,6 +30,12 @@ public function extract(FlowContext $context) : \Generator
3030
{
3131
$sql = $this->query instanceof SqlQuery ? $this->query->toSql() : $this->query;
3232

33+
if (!sql_query_order_by(sql_parse($sql))->hasOrderBy()) {
34+
throw new InvalidArgumentException(
35+
'LIMIT/OFFSET pagination requires ORDER BY clause for deterministic results'
36+
);
37+
}
38+
3339
$total = $this->maximum ?? $this->countTotal($sql);
3440

3541
if ($total === 0) {

src/adapter/etl-adapter-postgresql/tests/Flow/ETL/Adapter/PostgreSql/Tests/Unit/PostgreSqlLimitOffsetExtractorTest.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Flow\ETL\Adapter\PostgreSql\Tests\Unit;
66

7+
use function Flow\ETL\DSL\flow_context;
78
use Flow\ETL\Adapter\PostgreSql\PostgreSqlLimitOffsetExtractor;
89
use Flow\ETL\Exception\InvalidArgumentException;
910
use Flow\ETL\Schema;
@@ -13,6 +14,24 @@
1314

1415
final class PostgreSqlLimitOffsetExtractorTest extends FlowTestCase
1516
{
17+
protected function setUp() : void
18+
{
19+
if (!\extension_loaded('pg_query')) {
20+
self::markTestSkipped('pg_query extension is not loaded. For local development use `nix-shell --arg with-pg-query-ext true` to enable it in the shell.');
21+
}
22+
}
23+
24+
public function test_throws_exception_when_query_has_no_order_by() : void
25+
{
26+
$client = $this->createClientMock();
27+
$extractor = new PostgreSqlLimitOffsetExtractor($client, 'SELECT * FROM users');
28+
29+
$this->expectException(InvalidArgumentException::class);
30+
$this->expectExceptionMessage('LIMIT/OFFSET pagination requires ORDER BY clause for deterministic results');
31+
32+
\iterator_to_array($extractor->extract(flow_context()));
33+
}
34+
1635
public function test_with_maximum_validates_positive_value() : void
1736
{
1837
$client = $this->createClientMock();
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\PostgreSql\AST\Nodes;
6+
7+
use Flow\PostgreSql\AST\Transformers\SortOrder;
8+
use Flow\PostgreSql\Protobuf\AST\{SortBy, SortByDir};
9+
10+
final readonly class OrderByItem
11+
{
12+
public function __construct(
13+
private SortBy $sortBy,
14+
) {
15+
}
16+
17+
public function column() : ?string
18+
{
19+
$node = $this->sortBy->getNode();
20+
21+
if ($node === null) {
22+
return null;
23+
}
24+
25+
$columnRef = $node->getColumnRef();
26+
27+
if ($columnRef === null) {
28+
return null;
29+
}
30+
31+
$fields = $columnRef->getFields();
32+
33+
if ($fields === null || \count($fields) === 0) {
34+
return null;
35+
}
36+
37+
$fieldCount = \count($fields);
38+
$columnField = $fields[$fieldCount - 1];
39+
$stringNode = $columnField->getString();
40+
41+
if ($stringNode !== null) {
42+
return $stringNode->getSval();
43+
}
44+
45+
return null;
46+
}
47+
48+
public function direction() : SortOrder
49+
{
50+
$dir = $this->sortBy->getSortbyDir();
51+
52+
return match ($dir) {
53+
SortByDir::SORTBY_DESC => SortOrder::DESC,
54+
default => SortOrder::ASC,
55+
};
56+
}
57+
58+
public function raw() : SortBy
59+
{
60+
return $this->sortBy;
61+
}
62+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\PostgreSql\AST\Visitors;
6+
7+
use Flow\PostgreSql\AST\NodeVisitor;
8+
use Flow\PostgreSql\Protobuf\AST\SortBy;
9+
10+
/**
11+
* A visitor that collects all SortBy (ORDER BY items) nodes.
12+
*/
13+
final class SortByCollector implements NodeVisitor
14+
{
15+
/**
16+
* @var array<SortBy>
17+
*/
18+
private array $sortByClauses = [];
19+
20+
public static function nodeClass() : string
21+
{
22+
return SortBy::class;
23+
}
24+
25+
public function enter(object $node) : ?int
26+
{
27+
/** @var SortBy $node */
28+
$this->sortByClauses[] = $node;
29+
30+
return null;
31+
}
32+
33+
/**
34+
* @return array<SortBy>
35+
*/
36+
public function getSortByClauses() : array
37+
{
38+
return $this->sortByClauses;
39+
}
40+
41+
public function hasSortBy() : bool
42+
{
43+
return \count($this->sortByClauses) > 0;
44+
}
45+
46+
public function leave(object $node) : ?int
47+
{
48+
return null;
49+
}
50+
51+
public function reset() : void
52+
{
53+
$this->sortByClauses = [];
54+
}
55+
}

src/lib/postgresql/src/Flow/PostgreSql/DSL/functions.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
use Flow\PostgreSql\Explain\Analyzer\PlanAnalyzer;
1818
use Flow\PostgreSql\Explain\ExplainParser;
1919
use Flow\PostgreSql\Explain\Plan\Plan;
20-
use Flow\PostgreSql\Extractors\{Columns, Functions, QueryDepth, Tables};
20+
use Flow\PostgreSql\Extractors\{Columns, Functions, OrderBy as OrderByExtractor, QueryDepth, Tables};
2121
use Flow\PostgreSql\Protobuf\AST\Node;
2222
use Flow\PostgreSql\QueryBuilder\Clause\{
2323
CTE,
@@ -370,6 +370,15 @@ function sql_query_functions(ParsedQuery $query) : Functions
370370
return new Functions($query);
371371
}
372372

373+
/**
374+
* Extract ORDER BY clauses from a parsed SQL query.
375+
*/
376+
#[DocumentationDSL(module: Module::PG_QUERY, type: DSLType::HELPER)]
377+
function sql_query_order_by(ParsedQuery $query) : OrderByExtractor
378+
{
379+
return new OrderByExtractor($query);
380+
}
381+
373382
/**
374383
* Get the maximum nesting depth of a SQL query.
375384
*
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\PostgreSql\Extractors;
6+
7+
use Flow\PostgreSql\AST\Nodes\OrderByItem;
8+
use Flow\PostgreSql\AST\Visitors\SortByCollector;
9+
use Flow\PostgreSql\ParsedQuery;
10+
11+
final readonly class OrderBy
12+
{
13+
public function __construct(private ParsedQuery $query)
14+
{
15+
}
16+
17+
/**
18+
* @return array<OrderByItem>
19+
*/
20+
public function all() : array
21+
{
22+
$collector = new SortByCollector();
23+
$this->query->traverse($collector);
24+
25+
return \array_map(
26+
static fn ($sortBy) => new OrderByItem($sortBy),
27+
$collector->getSortByClauses()
28+
);
29+
}
30+
31+
public function hasOrderBy() : bool
32+
{
33+
$collector = new SortByCollector();
34+
$this->query->traverse($collector);
35+
36+
return $collector->hasSortBy();
37+
}
38+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\PostgreSql\Tests\Unit\AST\Visitors;
6+
7+
use Flow\PostgreSql\AST\Traverser;
8+
use Flow\PostgreSql\AST\Visitors\SortByCollector;
9+
use Flow\PostgreSql\Protobuf\AST\{ParseResult, SortBy};
10+
use PHPUnit\Framework\TestCase;
11+
12+
final class SortByCollectorTest extends TestCase
13+
{
14+
protected function setUp() : void
15+
{
16+
if (!\extension_loaded('pg_query')) {
17+
self::markTestSkipped('pg_query extension is not loaded. For local development use `nix-shell --arg with-pg-query-ext true` to enable it in the shell.');
18+
}
19+
}
20+
21+
public function test_collects_multiple_sort_by_columns() : void
22+
{
23+
$collector = new SortByCollector();
24+
$traverser = new Traverser($collector);
25+
$traverser->traverse($this->parseQuery('SELECT id FROM users ORDER BY name, created_at'));
26+
27+
self::assertCount(2, $collector->getSortByClauses());
28+
}
29+
30+
public function test_collects_sort_by_from_order_by_clause() : void
31+
{
32+
$collector = new SortByCollector();
33+
$traverser = new Traverser($collector);
34+
$traverser->traverse($this->parseQuery('SELECT id FROM users ORDER BY name'));
35+
36+
self::assertCount(1, $collector->getSortByClauses());
37+
}
38+
39+
public function test_collects_sort_by_from_subquery() : void
40+
{
41+
$collector = new SortByCollector();
42+
$traverser = new Traverser($collector);
43+
$traverser->traverse($this->parseQuery('SELECT * FROM (SELECT id, name FROM users ORDER BY id) sub'));
44+
45+
self::assertCount(1, $collector->getSortByClauses());
46+
}
47+
48+
public function test_collects_sort_by_with_direction() : void
49+
{
50+
$collector = new SortByCollector();
51+
$traverser = new Traverser($collector);
52+
$traverser->traverse($this->parseQuery('SELECT id FROM users ORDER BY name ASC, created_at DESC'));
53+
54+
self::assertCount(2, $collector->getSortByClauses());
55+
}
56+
57+
public function test_enter_returns_null() : void
58+
{
59+
$collector = new SortByCollector();
60+
$sortBy = new SortBy();
61+
62+
self::assertNull($collector->enter($sortBy));
63+
}
64+
65+
public function test_get_sort_by_clauses_returns_empty_array_initially() : void
66+
{
67+
$collector = new SortByCollector();
68+
69+
self::assertSame([], $collector->getSortByClauses());
70+
}
71+
72+
public function test_has_sort_by_returns_false_when_no_order_by() : void
73+
{
74+
$collector = new SortByCollector();
75+
$traverser = new Traverser($collector);
76+
$traverser->traverse($this->parseQuery('SELECT * FROM users'));
77+
78+
self::assertFalse($collector->hasSortBy());
79+
}
80+
81+
public function test_has_sort_by_returns_true_when_order_by_exists() : void
82+
{
83+
$collector = new SortByCollector();
84+
$traverser = new Traverser($collector);
85+
$traverser->traverse($this->parseQuery('SELECT id FROM users ORDER BY name'));
86+
87+
self::assertTrue($collector->hasSortBy());
88+
}
89+
90+
public function test_leave_returns_null() : void
91+
{
92+
$collector = new SortByCollector();
93+
$sortBy = new SortBy();
94+
95+
self::assertNull($collector->leave($sortBy));
96+
}
97+
98+
public function test_node_class_returns_sort_by_class() : void
99+
{
100+
self::assertSame(SortBy::class, SortByCollector::nodeClass());
101+
}
102+
103+
public function test_reset_clears_collected_sort_by_clauses() : void
104+
{
105+
$collector = new SortByCollector();
106+
$traverser = new Traverser($collector);
107+
$traverser->traverse($this->parseQuery('SELECT id FROM users ORDER BY name, created_at'));
108+
109+
self::assertCount(2, $collector->getSortByClauses());
110+
111+
$collector->reset();
112+
113+
self::assertSame([], $collector->getSortByClauses());
114+
}
115+
116+
public function test_returns_empty_for_query_without_order_by() : void
117+
{
118+
$collector = new SortByCollector();
119+
$traverser = new Traverser($collector);
120+
$traverser->traverse($this->parseQuery('SELECT * FROM users'));
121+
122+
self::assertSame([], $collector->getSortByClauses());
123+
}
124+
125+
private function parseQuery(string $sql) : ParseResult
126+
{
127+
/** @var string $json */
128+
$json = \pg_query_parse($sql);
129+
$result = new ParseResult();
130+
$result->mergeFromJsonString($json);
131+
132+
return $result;
133+
}
134+
}

0 commit comments

Comments
 (0)