Skip to content

Commit a6ed0bd

Browse files
committed
feat: add custom delimiter to csv_agg
Addresses the problem on PostgREST/postgrest#1102 Adds an overloaded `csv_agg` function: ```sql SELECT csv_agg(x, '|') AS body FROM projects x; ```
1 parent 1748da4 commit a6ed0bd

File tree

9 files changed

+169
-29
lines changed

9 files changed

+169
-29
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
loadtest:
3838
strategy:
3939
matrix:
40-
kind: ['csv_agg', 'postgrest']
40+
kind: ['csv_agg', 'csv_agg_delim', 'postgrest']
4141
name: Loadtest
4242
runs-on: ubuntu-24.04
4343
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ results/
77
*.diffs
88
pgbench_log.*
99
.history
10+
pg_csv--*.sql

README.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,25 @@
44
[![Coverage Status](https://coveralls.io/repos/github/PostgREST/pg_csv/badge.svg)](https://coveralls.io/github/PostgREST/pg_csv)
55
[![Tests](https://github.com/PostgREST/pg_csv/actions/workflows/ci.yaml/badge.svg)](https://github.com/PostgREST/pg_csv/actions)
66

7+
## Installation
8+
9+
Clone this repo and run:
10+
11+
```bash
12+
make && make install
13+
```
14+
15+
To install the extension:
16+
17+
```psql
18+
create extension pg_csv;
19+
```
20+
721
## csv_agg
822

923
Aggregate that builds a CSV as per [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt), quoting as required.
1024

11-
```
25+
```psql
1226
select csv_agg(x) from projects x;
1327
csv_agg
1428
-------------------
@@ -20,3 +34,21 @@ select csv_agg(x) from projects x;
2034
5,Orphan,
2135
(1 row)
2236
```
37+
38+
It also supports adding a custom delimiter.
39+
40+
```psql
41+
select csv_agg(x, '|') from projects x;
42+
csv_agg
43+
-------------------
44+
id|name|client_id+
45+
1|Windows 7|1 +
46+
2|Windows 10|1 +
47+
3|IOS|2 +
48+
4|OSX|2 +
49+
5|Orphan|
50+
(1 row)
51+
```
52+
53+
> [!IMPORTANT]
54+
> Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format.

bench/csv_agg_delim.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
\set lim random(1000, 2000)
2+
3+
select csv_agg(t,'|') from (
4+
select * from student_emotion_assessments limit :lim
5+
) as t;

sql/pg_csv--0.1.sql

Lines changed: 0 additions & 15 deletions
This file was deleted.

sql/pg_csv.sql

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,26 @@ create function csv_agg_transfn(internal, anyelement)
33
language c
44
as 'pg_csv';
55

6+
create function csv_agg_transfn(internal, anyelement, "char")
7+
returns internal
8+
language c
9+
as 'pg_csv';
10+
611
create function csv_agg_finalfn(internal)
712
returns text
813
language c
914
as 'pg_csv';
1015

16+
create aggregate csv_agg(anyelement, "char") (
17+
sfunc = csv_agg_transfn,
18+
stype = internal,
19+
finalfunc = csv_agg_finalfn,
20+
parallel = safe
21+
);
22+
1123
create aggregate csv_agg(anyelement) (
12-
sfunc = csv_agg_transfn,
13-
stype = internal,
14-
finalfunc = csv_agg_finalfn
24+
sfunc = csv_agg_transfn,
25+
stype = internal,
26+
finalfunc = csv_agg_finalfn,
27+
parallel = safe
1528
);

src/pg_csv.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
PG_MODULE_MAGIC;
44

55
static const char NEWLINE = '\n';
6-
static const char DELIMITER = ',';
76
static const char DQUOTE = '"';
87
static const char CR = '\r';
98

@@ -14,17 +13,21 @@ typedef struct {
1413
TupleDesc tupdesc;
1514
} CsvAggState;
1615

16+
static inline bool is_reserved(char c){
17+
return c == DQUOTE || c == NEWLINE || c == CR;
18+
}
19+
1720
// Any comma, quote, CR, LF requires quoting as per RFC https://www.ietf.org/rfc/rfc4180.txt
18-
static inline bool needs_quote(const char *s, size_t n) {
21+
static inline bool needs_quote(const char *s, size_t n, char delim) {
1922
while (n--) {
2023
char c = *s++;
21-
if (c == DELIMITER || c == DQUOTE || c == NEWLINE || c == CR) return true;
24+
if (c == delim || is_reserved(c)) return true;
2225
}
2326
return false;
2427
}
2528

26-
static inline void csv_append_field(StringInfo buf, const char *s, size_t n) {
27-
if (!needs_quote(s, n)) {
29+
static inline void csv_append_field(StringInfo buf, const char *s, size_t n, char delim) {
30+
if (!needs_quote(s, n, delim)) {
2831
appendBinaryStringInfo(buf, s, n);
2932
} else {
3033
appendStringInfoChar(buf, DQUOTE);
@@ -72,6 +75,15 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
7275

7376
HeapTupleHeader next = PG_GETARG_HEAPTUPLEHEADER(1);
7477

78+
char delim = PG_NARGS() >= 3 && !PG_ARGISNULL(2)?
79+
PG_GETARG_CHAR(2) :
80+
',';
81+
82+
if (is_reserved(delim))
83+
ereport(ERROR,
84+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
85+
errmsg("delimiter cannot be newline, carriage return or double quote")));
86+
7587
// build header and cache tupdesc once
7688
if (!state->header_done) {
7789
TupleDesc tdesc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next));
@@ -83,10 +95,10 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
8395
continue;
8496

8597
if (i > 0) // only append delimiter after the first value
86-
appendStringInfoChar(&state->accum_buf, DELIMITER);
98+
appendStringInfoChar(&state->accum_buf, delim);
8799

88100
char *cstr = NameStr(att->attname);
89-
csv_append_field(&state->accum_buf, cstr, strlen(cstr));
101+
csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim);
90102
}
91103

92104
appendStringInfoChar(&state->accum_buf, NEWLINE);
@@ -119,12 +131,12 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
119131
if (att->attisdropped) // pg always keeps dropped columns, guard against this
120132
continue;
121133

122-
if (i > 0) appendStringInfoChar(&state->accum_buf, DELIMITER);
134+
if (i > 0) appendStringInfoChar(&state->accum_buf, delim);
123135

124136
if (nulls[i]) continue; // empty field for NULL
125137

126138
char *cstr = datum_to_cstring(datums[i], att->atttypid);
127-
csv_append_field(&state->accum_buf, cstr, strlen(cstr));
139+
csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim);
128140
}
129141

130142
PG_RETURN_POINTER(state);

test/expected/delimiters.out

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
-- semicolon delimiter
2+
SELECT csv_agg(x, ';') AS body
3+
FROM projects x;
4+
body
5+
-------------------------------
6+
id;name;client_id +
7+
1;Windows 7;1 +
8+
2;has,comma;1 +
9+
;; +
10+
4;OSX;2 +
11+
;"has""quote"; +
12+
5;"has,comma and ""quote""";7+
13+
6;"has +
14+
LF";7 +
15+
7;"has \r CR";8 +
16+
8;"has \r +
17+
CRLF""";8
18+
(1 row)
19+
20+
-- pipe delimiter
21+
SELECT csv_agg(x, '|') AS body
22+
FROM projects x;
23+
body
24+
-------------------------------
25+
id|name|client_id +
26+
1|Windows 7|1 +
27+
2|has,comma|1 +
28+
|| +
29+
4|OSX|2 +
30+
|"has""quote"| +
31+
5|"has,comma and ""quote"""|7+
32+
6|"has +
33+
LF"|7 +
34+
7|"has \r CR"|8 +
35+
8|"has \r +
36+
CRLF"""|8
37+
(1 row)
38+
39+
-- tab delimiter
40+
SELECT csv_agg(x, E'\t') AS body
41+
FROM projects x;
42+
body
43+
-------------------------------------------
44+
id name client_id +
45+
1 Windows 7 1 +
46+
2 has,comma 1 +
47+
+
48+
4 OSX 2 +
49+
"has""quote" +
50+
5 "has,comma and ""quote""" 7+
51+
6 "has +
52+
LF" 7 +
53+
7 "has \r CR" 8 +
54+
8 "has \r +
55+
CRLF""" 8
56+
(1 row)
57+
58+
-- newline is forbidden as delimiter
59+
SELECT csv_agg(x, E'\n') AS body
60+
FROM projects x;
61+
ERROR: delimiter cannot be newline, carriage return or double quote
62+
-- double quote is forbidden as delimiter
63+
SELECT csv_agg(x, '"') AS body
64+
FROM projects x;
65+
ERROR: delimiter cannot be newline, carriage return or double quote
66+
-- carriage return is forbidden as delimiter
67+
SELECT csv_agg(x, E'\r') AS body
68+
FROM projects x;
69+
ERROR: delimiter cannot be newline, carriage return or double quote

test/sql/delimiters.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
-- semicolon delimiter
2+
SELECT csv_agg(x, ';') AS body
3+
FROM projects x;
4+
5+
-- pipe delimiter
6+
SELECT csv_agg(x, '|') AS body
7+
FROM projects x;
8+
9+
-- tab delimiter
10+
SELECT csv_agg(x, E'\t') AS body
11+
FROM projects x;
12+
13+
-- newline is forbidden as delimiter
14+
SELECT csv_agg(x, E'\n') AS body
15+
FROM projects x;
16+
17+
-- double quote is forbidden as delimiter
18+
SELECT csv_agg(x, '"') AS body
19+
FROM projects x;
20+
21+
-- carriage return is forbidden as delimiter
22+
SELECT csv_agg(x, E'\r') AS body
23+
FROM projects x;

0 commit comments

Comments
 (0)