Skip to content

Commit 6542d44

Browse files
committed
feature: flow postgresql adapter - extractors
1 parent 47aeef5 commit 6542d44

File tree

27 files changed

+1434
-34
lines changed

27 files changed

+1434
-34
lines changed

.github/workflows/monorepo-split.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ jobs:
7373
split_repository: 'etl-adapter-text'
7474
- local_path: 'src/adapter/etl-adapter-xml'
7575
split_repository: 'etl-adapter-xml'
76+
- local_path: 'src/adapter/etl-adapter-postgresql'
77+
split_repository: 'etl-adapter-postgresql'
7678

7779
- local_path: 'src/bridge/filesystem/azure'
7880
split_repository: 'filesystem-azure-bridge'

composer.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
"flow-php/etl-adapter-parquet": "self.version",
8383
"flow-php/etl-adapter-text": "self.version",
8484
"flow-php/etl-adapter-xml": "self.version",
85+
"flow-php/etl-adapter-postgresql": "self.version",
8586
"flow-php/filesystem": "self.version",
8687
"flow-php/filesystem-async-aws-bridge": "self.version",
8788
"flow-php/filesystem-azure-bridge": "self.version",
@@ -115,6 +116,7 @@
115116
"src/adapter/etl-adapter-parquet/src/Flow",
116117
"src/adapter/etl-adapter-text/src/Flow",
117118
"src/adapter/etl-adapter-xml/src/Flow",
119+
"src/adapter/etl-adapter-postgresql/src/Flow",
118120
"src/bridge/filesystem/async-aws/src/Flow",
119121
"src/bridge/filesystem/azure/src/Flow",
120122
"src/bridge/monolog/http/src/Flow",
@@ -155,6 +157,7 @@
155157
"src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/functions.php",
156158
"src/adapter/etl-adapter-text/src/Flow/ETL/Adapter/Text/functions.php",
157159
"src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/functions.php",
160+
"src/adapter/etl-adapter-postgresql/src/Flow/ETL/Adapter/PostgreSql/functions.php",
158161
"src/bridge/filesystem/async-aws/src/Flow/Filesystem/Bridge/AsyncAWS/DSL/functions.php",
159162
"src/bridge/filesystem/azure/src/Flow/Filesystem/Bridge/Azure/DSL/functions.php",
160163
"src/bridge/monolog/http/src/Flow/Bridge/Monolog/Http/DSL/functions.php",
@@ -191,6 +194,7 @@
191194
"src/adapter/etl-adapter-parquet/tests/Flow",
192195
"src/adapter/etl-adapter-text/tests/Flow",
193196
"src/adapter/etl-adapter-xml/tests/Flow",
197+
"src/adapter/etl-adapter-postgresql/tests/Flow",
194198
"src/bridge/filesystem/async-aws/tests/Flow",
195199
"src/bridge/filesystem/azure/tests/Flow",
196200
"src/bridge/monolog/http/tests/Flow",
@@ -276,7 +280,8 @@
276280
"@test:adapter:meilisearch",
277281
"@test:adapter:parquet",
278282
"@test:adapter:text",
279-
"@test:adapter:xml"
283+
"@test:adapter:xml",
284+
"@test:adapter:postgresql"
280285
],
281286
"test:core": [
282287
"tools/phpunit/vendor/bin/phpunit --testsuite=etl-unit --log-junit ./var/phpunit/logs/etl-unit.junit.xml --coverage-clover=./var/phpunit/coverage/clover/etl-unit.coverage.xml --coverage-html=./var/phpunit/coverage/html/etl-unit",
@@ -383,6 +388,10 @@
383388
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-xml-unit --log-junit ./var/phpunit/logs/adapter-xml-unit.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-xml-unit.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-xml-unit",
384389
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-xml-integration --log-junit ./var/phpunit/coverage/clover/adapter-xml-integration.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-xml-integration.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-xml-integration"
385390
],
391+
"test:adapter:postgresql": [
392+
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-postgresql-unit --log-junit ./var/phpunit/logs/adapter-postgresql-unit.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-postgresql-unit.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-postgresql-unit",
393+
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-postgresql-integration --log-junit ./var/phpunit/logs/adapter-postgresql-integration.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-postgresql-integration.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-postgresql-integration"
394+
],
386395
"test:docs": [
387396
"docker run -t --rm -v $PWD:/app norberttech/md-link-linter --exclude=vendor --exclude=.scratchpad --exclude=examples --exclude=documentation ."
388397
],
@@ -481,6 +490,7 @@
481490
"./tools/phpdocumentor/vendor/bin/phpdoc --config=./phpdoc/adapter.parquet.xml",
482491
"./tools/phpdocumentor/vendor/bin/phpdoc --config=./phpdoc/adapter.text.xml",
483492
"./tools/phpdocumentor/vendor/bin/phpdoc --config=./phpdoc/adapter.xml.xml",
493+
"./tools/phpdocumentor/vendor/bin/phpdoc --config=./phpdoc/adapter.postgresql.xml",
484494
"./tools/phpdocumentor/vendor/bin/phpdoc --config=./phpdoc/bridge.filesystem.async-aws.xml",
485495
"./tools/phpdocumentor/vendor/bin/phpdoc --config=./phpdoc/bridge.filesystem.azure.xml",
486496
"./tools/phpdocumentor/vendor/bin/phpdoc --config=./phpdoc/bridge.monolog.http.xml",
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
# ETL Adapter: PostgreSQL
2+
3+
- [⬅️️ Back](/documentation/introduction.md)
4+
- [📦Packagist](https://packagist.org/packages/flow-php/etl-adapter-postgresql)
5+
- [🐙GitHub](https://github.com/flow-php/etl-adapter-postgresql)
6+
- [📚API Reference](/documentation/api/adapter/postgresql)
7+
- [📁Files](/documentation/api/adapter/postgresql/indices/files.html)
8+
9+
[TOC]
10+
11+
Flow PHP's Adapter PostgreSQL is designed to seamlessly integrate PostgreSQL within your ETL (Extract, Transform, Load) workflows. This adapter is built on top of the [PostgreSQL library](/documentation/components/libs/postgresql/client-connection.md), providing efficient data extraction capabilities with built-in pagination support. By harnessing the Adapter PostgreSQL library, developers can tap into robust features for precise database interaction, simplifying complex data transformations and enhancing data processing efficiency.
12+
13+
## Installation
14+
15+
```
16+
composer require flow-php/etl-adapter-postgresql:~--FLOW_PHP_VERSION--
17+
```
18+
19+
## Requirements
20+
21+
- PHP 8.3+
22+
- ext-pgsql
23+
- ext-pg_query (optional, for query builder support)
24+
25+
## Description
26+
27+
This adapter provides two extraction strategies optimized for different use cases:
28+
29+
- **LIMIT/OFFSET Pagination**: Simple pagination suitable for smaller datasets
30+
- **Keyset (Cursor) Pagination**: Efficient pagination for large datasets with consistent performance
31+
32+
Both extractors support:
33+
- Raw SQL strings or Query Builder objects
34+
- Configurable page sizes
35+
- Maximum row limits
36+
- Custom schema definitions
37+
38+
## Extractor - LIMIT/OFFSET Pagination
39+
40+
The `from_pgsql_limit_offset` extractor uses traditional LIMIT/OFFSET pagination. This is simple to use but may have performance degradation on very large datasets with high offsets.
41+
42+
### Basic Usage
43+
44+
```php
45+
use function Flow\ETL\Adapter\PostgreSql\from_pgsql_limit_offset;
46+
use function Flow\PostgreSql\DSL\{pgsql_client, pgsql_connection_dsn};
47+
48+
$client = pgsql_client(pgsql_connection_dsn('pgsql://user:pass@localhost:5432/database'));
49+
50+
data_frame()
51+
->read(from_pgsql_limit_offset(
52+
$client,
53+
"SELECT id, name, email FROM users ORDER BY id",
54+
pageSize: 1000
55+
))
56+
->write(to_output())
57+
->run();
58+
```
59+
60+
### With Query Builder
61+
62+
```php
63+
use function Flow\ETL\Adapter\PostgreSql\from_pgsql_limit_offset;
64+
use function Flow\PostgreSql\DSL\{asc, col, select, table};
65+
66+
data_frame()
67+
->read(from_pgsql_limit_offset(
68+
$client,
69+
select(col('id'), col('name'), col('email'))
70+
->from(table('users'))
71+
->orderBy(asc(col('id'))),
72+
pageSize: 500
73+
))
74+
->write(to_output())
75+
->run();
76+
```
77+
78+
### With Maximum Row Limit
79+
80+
```php
81+
use function Flow\ETL\Adapter\PostgreSql\from_pgsql_limit_offset;
82+
83+
data_frame()
84+
->read(from_pgsql_limit_offset(
85+
$client,
86+
"SELECT * FROM large_table ORDER BY id",
87+
pageSize: 1000,
88+
maximum: 10000 // Only extract first 10,000 rows
89+
))
90+
->write(to_output())
91+
->run();
92+
```
93+
94+
## Extractor - Keyset (Cursor) Pagination
95+
96+
The `from_pgsql_key_set` extractor uses keyset pagination (also known as cursor-based pagination). This provides consistent performance regardless of how deep you paginate, making it ideal for large datasets.
97+
98+
### Basic Usage
99+
100+
```php
101+
use function Flow\ETL\Adapter\PostgreSql\{from_pgsql_key_set, pgsql_pagination_key_asc, pgsql_pagination_key_set};
102+
103+
data_frame()
104+
->read(from_pgsql_key_set(
105+
$client,
106+
"SELECT id, name, email FROM users",
107+
pgsql_pagination_key_set(pgsql_pagination_key_asc('id')),
108+
pageSize: 1000
109+
))
110+
->write(to_output())
111+
->run();
112+
```
113+
114+
### Descending Order
115+
116+
```php
117+
use function Flow\ETL\Adapter\PostgreSql\{from_pgsql_key_set, pgsql_pagination_key_desc, pgsql_pagination_key_set};
118+
119+
data_frame()
120+
->read(from_pgsql_key_set(
121+
$client,
122+
"SELECT id, name, created_at FROM orders",
123+
pgsql_pagination_key_set(pgsql_pagination_key_desc('id')), // Newest first
124+
pageSize: 500
125+
))
126+
->write(to_output())
127+
->run();
128+
```
129+
130+
### Composite Keys
131+
132+
For tables with composite ordering, you can specify multiple keys:
133+
134+
```php
135+
use function Flow\ETL\Adapter\PostgreSql\{from_pgsql_key_set, pgsql_pagination_key_asc, pgsql_pagination_key_desc, pgsql_pagination_key_set};
136+
137+
data_frame()
138+
->read(from_pgsql_key_set(
139+
$client,
140+
"SELECT * FROM events",
141+
pgsql_pagination_key_set(
142+
pgsql_pagination_key_desc('created_at'), // First by date descending
143+
pgsql_pagination_key_asc('id') // Then by ID ascending
144+
),
145+
pageSize: 1000
146+
))
147+
->write(to_output())
148+
->run();
149+
```
150+
151+
### With Query Builder
152+
153+
```php
154+
use function Flow\ETL\Adapter\PostgreSql\{from_pgsql_key_set, pgsql_pagination_key_asc, pgsql_pagination_key_set};
155+
use function Flow\PostgreSql\DSL\{col, select, star, table};
156+
157+
data_frame()
158+
->read(from_pgsql_key_set(
159+
$client,
160+
select(star())->from(table('products')),
161+
pgsql_pagination_key_set(pgsql_pagination_key_asc('product_id')),
162+
pageSize: 500
163+
))
164+
->write(to_output())
165+
->run();
166+
```
167+
168+
## DSL Functions Reference
169+
170+
### Extractor Functions
171+
172+
| Function | Description |
173+
|----------|-------------|
174+
| `from_pgsql_limit_offset($client, $query, $pageSize, $maximum)` | Extract using LIMIT/OFFSET pagination |
175+
| `from_pgsql_key_set($client, $query, $keySet, $pageSize, $maximum)` | Extract using keyset pagination |
176+
177+
### Key Functions
178+
179+
| Function | Description |
180+
|----------|-------------|
181+
| `pgsql_pagination_key_asc($column)` | Create an ascending key for keyset pagination |
182+
| `pgsql_pagination_key_desc($column)` | Create a descending key for keyset pagination |
183+
| `pgsql_pagination_key_set(...$keys)` | Create a keyset from one or more keys |
184+
185+
## Choosing Between Extractors
186+
187+
### Use LIMIT/OFFSET when:
188+
- Working with smaller datasets (< 100k rows)
189+
- You need simple, straightforward pagination
190+
- Random page access is required
191+
- The offset values remain relatively small
192+
193+
### Use Keyset Pagination when:
194+
- Working with large datasets (100k+ rows)
195+
- Performance consistency is critical
196+
- You're doing sequential/forward pagination
197+
- Your table has suitable indexed columns for the keyset
198+
199+
## Performance Considerations
200+
201+
### LIMIT/OFFSET
202+
203+
- Simple to understand and implement
204+
- Performance degrades as offset increases (PostgreSQL must skip all previous rows)
205+
- Memory usage increases with larger offsets
206+
207+
### Keyset Pagination
208+
209+
- Consistent O(1) performance regardless of position
210+
- Requires indexed columns in the keyset
211+
- Cannot jump to arbitrary pages (sequential access only)
212+
- Handles concurrent modifications more gracefully

documentation/components/extensions/pg-query-ext.md

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
A compiled PHP extension for PostgreSQL query parsing using [libpg_query](https://github.com/pganalyze/libpg_query).
88

9-
This extension provides low-level functions for parsing PostgreSQL SQL queries. For a higher-level, object-oriented interface with strongly-typed AST nodes, see the [pg-query library](/documentation/components/libs/pg-query.md).
9+
This extension provides low-level functions for parsing PostgreSQL SQL queries. For a higher-level, object-oriented
10+
interface with strongly-typed AST nodes, see the [pg-query library](/documentation/components/libs/pg-query.md).
1011

1112
## Features
1213

@@ -41,7 +42,8 @@ pie install flow-php/pg-query-ext
4142
pie install flow-php/pg-query-ext --with-pg-version=16
4243
```
4344

44-
The extension will automatically download and build the appropriate libpg_query version. Build dependencies (`protobuf-c`, `git`, `make`, `gcc`) must be available on your system.
45+
The extension will automatically download and build the appropriate libpg_query version. Build dependencies (
46+
`protobuf-c`, `git`, `make`, `gcc`) must be available on your system.
4547

4648
### Supported PostgreSQL Versions
4749

@@ -56,7 +58,7 @@ The extension will automatically download and build the appropriate libpg_query
5658
### In php.ini
5759

5860
```ini
59-
extension=pg_query
61+
extension = pg_query
6062
```
6163

6264
### During Development
@@ -129,19 +131,19 @@ $summary = pg_query_summary('SELECT * FROM users WHERE id = 1');
129131

130132
## Functions Reference
131133

132-
| Function | Description | Returns |
133-
|----------|-------------|---------|
134-
| `pg_query_parse(string $sql)` | Parse SQL to JSON AST | `string` (JSON) |
135-
| `pg_query_parse_protobuf(string $sql)` | Parse SQL to protobuf AST | `string` (protobuf) |
136-
| `pg_query_fingerprint(string $sql)` | Generate query fingerprint | `string\|false` |
137-
| `pg_query_normalize(string $sql)` | Normalize query with placeholders | `string\|false` |
138-
| `pg_query_normalize_utility(string $sql)` | Normalize DDL/utility statements | `string\|false` |
139-
| `pg_query_parse_plpgsql(string $sql)` | Parse PL/pgSQL function | `string` (JSON) |
140-
| `pg_query_split(string $sql)` | Split multiple statements | `array<string>` |
141-
| `pg_query_scan(string $sql)` | Scan SQL into tokens | `string` (protobuf) |
142-
| `pg_query_deparse(string $protobuf)` | Convert protobuf AST back to SQL | `string` |
143-
| `pg_query_deparse_opts(...)` | Deparse with formatting options | `string` |
144-
| `pg_query_summary(string $sql, int $options, int $truncate)` | Generate query summary | `string` (protobuf) |
134+
| Function | Description | Returns |
135+
|--------------------------------------------------------------|-----------------------------------|---------------------|
136+
| `pg_query_parse(string $sql)` | Parse SQL to JSON AST | `string` (JSON) |
137+
| `pg_query_parse_protobuf(string $sql)` | Parse SQL to protobuf AST | `string` (protobuf) |
138+
| `pg_query_fingerprint(string $sql)` | Generate query fingerprint | `string\|false` |
139+
| `pg_query_normalize(string $sql)` | Normalize query with placeholders | `string\|false` |
140+
| `pg_query_normalize_utility(string $sql)` | Normalize DDL/utility statements | `string\|false` |
141+
| `pg_query_parse_plpgsql(string $sql)` | Parse PL/pgSQL function | `string` (JSON) |
142+
| `pg_query_split(string $sql)` | Split multiple statements | `array<string>` |
143+
| `pg_query_scan(string $sql)` | Scan SQL into tokens | `string` (protobuf) |
144+
| `pg_query_deparse(string $protobuf)` | Convert protobuf AST back to SQL | `string` |
145+
| `pg_query_deparse_opts(...)` | Deparse with formatting options | `string` |
146+
| `pg_query_summary(string $sql, int $options, int $truncate)` | Generate query summary | `string` (protobuf) |
145147

146148
### pg_query_deparse_opts Parameters
147149

@@ -206,15 +208,18 @@ make test
206208

207209
## Architecture
208210

209-
The extension is built on top of [libpg_query](https://github.com/pganalyze/libpg_query), which extracts PostgreSQL's query parser into a standalone library. This means you get the exact same SQL parsing behavior as PostgreSQL itself.
211+
The extension is built on top of [libpg_query](https://github.com/pganalyze/libpg_query), which extracts PostgreSQL's
212+
query parser into a standalone library. This means you get the exact same SQL parsing behavior as PostgreSQL itself.
210213

211214
Key implementation details:
215+
212216
- **Static linking**: libpg_query.a is statically linked into the extension
213217
- **Build dependency**: Requires `protobuf-c` library for compilation (libpg_query uses protobuf internally)
214218
- **Auto-download**: The build system automatically downloads the correct libpg_query version
215219

216220
## See Also
217221

218-
- [pg-query library](/documentation/components/libs/pg-query.md) - Higher-level PHP wrapper with strongly-typed AST nodes
222+
- [pg-query library](/documentation/components/libs/pg-query.md) - Higher-level PHP wrapper with strongly-typed AST
223+
nodes
219224
- [libpg_query](https://github.com/pganalyze/libpg_query) - The underlying C library
220225
- [Nix Development Environment](/documentation/contributing/nix.md) - Using nix-shell for development

phpdoc/adapter.postgresql.xml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?xml version="1.0" encoding="UTF-8" ?>
2+
<phpdocumentor
3+
configVersion="3"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xmlns="https://www.phpdoc.org"
6+
>
7+
<title>Flow PHP</title>
8+
<paths>
9+
<output>./../web/landing/build/documentation/api/adapter/postgresql</output>
10+
<cache>./../var/phpdocumentor/cache/adapter/postgresql</cache>
11+
</paths>
12+
<version number="1.x">
13+
<api format="php">
14+
<source dsn="./../">
15+
<path>src/adapter/etl-adapter-postgresql/src</path>
16+
</source>
17+
<output>postgresql</output>
18+
<default-package-name>PostgreSQL Adapter</default-package-name>
19+
<visibility>public</visibility>
20+
<include-source>false</include-source>
21+
</api>
22+
</version>
23+
<setting name="template.color" value="orange"/>
24+
</phpdocumentor>

0 commit comments

Comments
 (0)