diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8a98069..c55b6b8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -37,7 +37,7 @@ jobs: loadtest: strategy: matrix: - kind: ['csv_agg', 'postgrest'] + kind: ['csv_agg', 'csv_agg_delim', 'postgrest'] name: Loadtest runs-on: ubuntu-24.04 steps: diff --git a/.gitignore b/.gitignore index e9e5914..457a4a7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ results/ *.diffs pgbench_log.* .history +pg_csv--*.sql diff --git a/README.md b/README.md index 3058771..cdf7b39 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,25 @@ [![Coverage Status](https://coveralls.io/repos/github/PostgREST/pg_csv/badge.svg)](https://coveralls.io/github/PostgREST/pg_csv) [![Tests](https://github.com/PostgREST/pg_csv/actions/workflows/ci.yaml/badge.svg)](https://github.com/PostgREST/pg_csv/actions) +## Installation + +Clone this repo and run: + +```bash +make && make install +``` + +To install the extension: + +```psql +create extension pg_csv; +``` + ## csv_agg Aggregate that builds a CSV as per [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt), quoting as required. -``` +```psql select csv_agg(x) from projects x; csv_agg ------------------- @@ -20,3 +34,21 @@ select csv_agg(x) from projects x; 5,Orphan, (1 row) ``` + +It also supports adding a custom delimiter. + +```psql +select csv_agg(x, '|') from projects x; + csv_agg +------------------- + id|name|client_id+ + 1|Windows 7|1 + + 2|Windows 10|1 + + 3|IOS|2 + + 4|OSX|2 + + 5|Orphan| +(1 row) +``` + +> [!IMPORTANT] +> Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format. diff --git a/bench/csv_agg_delim.sql b/bench/csv_agg_delim.sql new file mode 100644 index 0000000..0b7836d --- /dev/null +++ b/bench/csv_agg_delim.sql @@ -0,0 +1,5 @@ +\set lim random(1000, 2000) + +select csv_agg(t,'|') from ( + select * from student_emotion_assessments limit :lim +) as t; diff --git a/sql/pg_csv--0.1.sql b/sql/pg_csv--0.1.sql deleted file mode 100644 index cad5e84..0000000 --- a/sql/pg_csv--0.1.sql +++ /dev/null @@ -1,15 +0,0 @@ -create function csv_agg_transfn(internal, anyelement) - returns internal - language c - as 'pg_csv'; - -create function csv_agg_finalfn(internal) - returns text - language c - as 'pg_csv'; - -create aggregate csv_agg(anyelement) ( - sfunc = csv_agg_transfn, - stype = internal, - finalfunc = csv_agg_finalfn -); diff --git a/sql/pg_csv.sql b/sql/pg_csv.sql index cad5e84..20fb04a 100644 --- a/sql/pg_csv.sql +++ b/sql/pg_csv.sql @@ -3,13 +3,26 @@ create function csv_agg_transfn(internal, anyelement) language c as 'pg_csv'; +create function csv_agg_transfn(internal, anyelement, "char") + returns internal + language c + as 'pg_csv'; + create function csv_agg_finalfn(internal) returns text language c as 'pg_csv'; +create aggregate csv_agg(anyelement, "char") ( + sfunc = csv_agg_transfn, + stype = internal, + finalfunc = csv_agg_finalfn, + parallel = safe +); + create aggregate csv_agg(anyelement) ( - sfunc = csv_agg_transfn, - stype = internal, - finalfunc = csv_agg_finalfn + sfunc = csv_agg_transfn, + stype = internal, + finalfunc = csv_agg_finalfn, + parallel = safe ); diff --git a/src/pg_csv.c b/src/pg_csv.c index 207ff39..f94310c 100644 --- a/src/pg_csv.c +++ b/src/pg_csv.c @@ -2,10 +2,9 @@ PG_MODULE_MAGIC; -static const char NEWLINE = '\n'; -static const char DELIMITER = ','; -static const char DQUOTE = '"'; -static const char CR = '\r'; +static const char NEWLINE = '\n'; +static const char DQUOTE = '"'; +static const char CR = '\r'; typedef struct { StringInfoData accum_buf; @@ -14,17 +13,21 @@ typedef struct { TupleDesc tupdesc; } CsvAggState; +static inline bool is_reserved(char c) { + return c == DQUOTE || c == NEWLINE || c == CR; +} + // Any comma, quote, CR, LF requires quoting as per RFC https://www.ietf.org/rfc/rfc4180.txt -static inline bool needs_quote(const char *s, size_t n) { +static inline bool needs_quote(const char *s, size_t n, char delim) { while (n--) { char c = *s++; - if (c == DELIMITER || c == DQUOTE || c == NEWLINE || c == CR) return true; + if (c == delim || is_reserved(c)) return true; } return false; } -static inline void csv_append_field(StringInfo buf, const char *s, size_t n) { - if (!needs_quote(s, n)) { +static inline void csv_append_field(StringInfo buf, const char *s, size_t n, char delim) { + if (!needs_quote(s, n, delim)) { appendBinaryStringInfo(buf, s, n); } else { appendStringInfoChar(buf, DQUOTE); @@ -72,6 +75,10 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) { HeapTupleHeader next = PG_GETARG_HEAPTUPLEHEADER(1); + char delim = PG_NARGS() >= 3 && !PG_ARGISNULL(2) ? PG_GETARG_CHAR(2) : ','; + + if (is_reserved(delim)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("delimiter cannot be newline, carriage return or double quote"))); + // build header and cache tupdesc once if (!state->header_done) { TupleDesc tdesc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next)); @@ -83,10 +90,10 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) { continue; if (i > 0) // only append delimiter after the first value - appendStringInfoChar(&state->accum_buf, DELIMITER); + appendStringInfoChar(&state->accum_buf, delim); char *cstr = NameStr(att->attname); - csv_append_field(&state->accum_buf, cstr, strlen(cstr)); + csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim); } appendStringInfoChar(&state->accum_buf, NEWLINE); @@ -119,12 +126,12 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) { if (att->attisdropped) // pg always keeps dropped columns, guard against this continue; - if (i > 0) appendStringInfoChar(&state->accum_buf, DELIMITER); + if (i > 0) appendStringInfoChar(&state->accum_buf, delim); if (nulls[i]) continue; // empty field for NULL char *cstr = datum_to_cstring(datums[i], att->atttypid); - csv_append_field(&state->accum_buf, cstr, strlen(cstr)); + csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim); } PG_RETURN_POINTER(state); diff --git a/src/pg_prelude.h b/src/pg_prelude.h index a49ca8c..d427ffe 100644 --- a/src/pg_prelude.h +++ b/src/pg_prelude.h @@ -41,10 +41,10 @@ #include #include #include -#include #include #include #include +#include #include #pragma GCC diagnostic pop diff --git a/test/expected/delimiters.out b/test/expected/delimiters.out new file mode 100644 index 0000000..df227f9 --- /dev/null +++ b/test/expected/delimiters.out @@ -0,0 +1,69 @@ +-- semicolon delimiter +SELECT csv_agg(x, ';') AS body +FROM projects x; + body +------------------------------- + id;name;client_id + + 1;Windows 7;1 + + 2;has,comma;1 + + ;; + + 4;OSX;2 + + ;"has""quote"; + + 5;"has,comma and ""quote""";7+ + 6;"has + + LF";7 + + 7;"has \r CR";8 + + 8;"has \r + + CRLF""";8 +(1 row) + +-- pipe delimiter +SELECT csv_agg(x, '|') AS body +FROM projects x; + body +------------------------------- + id|name|client_id + + 1|Windows 7|1 + + 2|has,comma|1 + + || + + 4|OSX|2 + + |"has""quote"| + + 5|"has,comma and ""quote"""|7+ + 6|"has + + LF"|7 + + 7|"has \r CR"|8 + + 8|"has \r + + CRLF"""|8 +(1 row) + +-- tab delimiter +SELECT csv_agg(x, E'\t') AS body +FROM projects x; + body +------------------------------------------- + id name client_id + + 1 Windows 7 1 + + 2 has,comma 1 + + + + 4 OSX 2 + + "has""quote" + + 5 "has,comma and ""quote""" 7+ + 6 "has + + LF" 7 + + 7 "has \r CR" 8 + + 8 "has \r + + CRLF""" 8 +(1 row) + +-- newline is forbidden as delimiter +SELECT csv_agg(x, E'\n') AS body +FROM projects x; +ERROR: delimiter cannot be newline, carriage return or double quote +-- double quote is forbidden as delimiter +SELECT csv_agg(x, '"') AS body +FROM projects x; +ERROR: delimiter cannot be newline, carriage return or double quote +-- carriage return is forbidden as delimiter +SELECT csv_agg(x, E'\r') AS body +FROM projects x; +ERROR: delimiter cannot be newline, carriage return or double quote diff --git a/test/sql/delimiters.sql b/test/sql/delimiters.sql new file mode 100644 index 0000000..6397233 --- /dev/null +++ b/test/sql/delimiters.sql @@ -0,0 +1,23 @@ +-- semicolon delimiter +SELECT csv_agg(x, ';') AS body +FROM projects x; + +-- pipe delimiter +SELECT csv_agg(x, '|') AS body +FROM projects x; + +-- tab delimiter +SELECT csv_agg(x, E'\t') AS body +FROM projects x; + +-- newline is forbidden as delimiter +SELECT csv_agg(x, E'\n') AS body +FROM projects x; + +-- double quote is forbidden as delimiter +SELECT csv_agg(x, '"') AS body +FROM projects x; + +-- carriage return is forbidden as delimiter +SELECT csv_agg(x, E'\r') AS body +FROM projects x;