Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 43 additions & 9 deletions clojure/src/pgloader/load_file/ast.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
(ns pgloader.load-file.ast
(:require [instaparse.core :as insta]
[clojure.string :as str]
[clojure.tools.logging :as log]
[pgloader.pg-service :as pg-service]
[pgloader.mysql-options :as mysql-opts])
(:import [java.net URI]))
Expand Down Expand Up @@ -449,6 +450,7 @@
:csv-header {:csv-header true}
:csv-escape-mode {:escape-mode :following}
:lines-terminated {:lines-terminated (interpret-escape (second (second node)))}
:date-format {:date-format (second (second node))}
:drop-indexes {:drop-indexes true}
nil))))

Expand Down Expand Up @@ -534,6 +536,19 @@
:target-type target-type
:using using-expr}})))

(defn- date-time-target-type?
[target-type]
(contains? #{"date"
"time"
"time without time zone"
"time with time zone"
"timetz"
"timestamp"
"timestamp without time zone"
"timestamp with time zone"
"timestamptz"}
(some-> target-type str/lower-case)))

(defn transform
"Transform an instaparse hiccup tree into a LoadCommand record.
The tree comes from the instaparse parser with :string-ci true,
Expand Down Expand Up @@ -643,15 +658,33 @@
after-load (when after-load-node
(let [commands (rest (second after-load-node))]
(mapv #(second %) (filter vector? commands))))
cols (when source-col-list
(mapv :name (map parse-column-item
(filter #(= :column-item (first %))
(rest source-col-list)))))
col-formats (when source-col-list
(seq (filter :date-format
(map parse-column-item
(filter #(= :column-item (first %))
(rest source-col-list))))))
source-column-items (when source-col-list
(map parse-column-item
(filter #(= :column-item (first %))
(rest source-col-list))))
cols (when source-column-items
(mapv :name source-column-items))
explicit-col-formats (seq (filter :date-format source-column-items))
target-projections (seq (or (:projections tt-table)
tt-projections))
default-col-formats (when (and cols
(:date-format csv-options))
(let [explicit-names (set (map :name explicit-col-formats))
typed-by-name (into {}
(keep (fn [{:keys [column-name target-type]}]
(when (date-time-target-type? target-type)
[column-name target-type]))
target-projections))]
(if (seq typed-by-name)
(keep
(fn [col-name]
(when (and (not (contains? explicit-names col-name))
(get typed-by-name col-name))
{:name col-name
:date-format (:date-format csv-options)}))
cols)
(log/warn "WITH date format was specified, but no target date/time columns were annotated; add TARGET TABLE column types to apply it."))))
col-formats (seq (concat default-col-formats explicit-col-formats))
col-nullifs (when source-col-list
(seq (filter (comp seq :nullifs)
(map parse-column-item
Expand Down Expand Up @@ -1041,6 +1074,7 @@
:create-tables (assoc acc :create-tables true)
:create-no-tables (assoc acc :create-tables false)
:fixed-header (assoc acc :fixed-header true)
:date-format (assoc acc :date-format (second (second opt)))
:drop-indexes (assoc acc :drop-indexes true)
:disable-triggers (assoc acc :disable-triggers true)
acc))
Expand Down
5 changes: 3 additions & 2 deletions clojure/src/pgloader/load_file/grammar.clj
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@

with-fixed-clause = <'WITH'> <ws> fixed-option (<opt-ws> <','> <opt-ws> fixed-option)*
fixed-option = truncate | create-tables | create-table | create-no-tables | batch-rows
| batch-size | batch-concurrency | disable-triggers | drop-indexes | fixed-header
| batch-size | batch-concurrency | disable-triggers | drop-indexes | fixed-header | date-format
fixed-header = <'fixed'> <ws> <'header'>

load-archive = <'LOAD'> <ws> <'ARCHIVE'>
Expand Down Expand Up @@ -165,7 +165,8 @@
table-name = #'[a-zA-Z_][a-zA-Z0-9_-]*' | <'\"'> #'[^\"]+' <'\"'>

with-csv-clause = <'WITH'> <ws> csv-option (<opt-ws> <','> <opt-ws> csv-option)*
csv-option = skip-header | fields-enclosed | fields-terminated | fields-escaped | fields-not-enclosed | csv-encoding | create-tables | create-no-tables | nullif | keep-unquoted-blanks | trim-unquoted-blanks | truncate | disable-triggers | batch-rows | batch-size | batch-concurrency | csv-header | lines-terminated | csv-escape-mode | drop-indexes
csv-option = skip-header | fields-enclosed | fields-terminated | fields-escaped | fields-not-enclosed | csv-encoding | create-tables | create-no-tables | nullif | keep-unquoted-blanks | trim-unquoted-blanks | truncate | disable-triggers | batch-rows | batch-size | batch-concurrency | csv-header | lines-terminated | csv-escape-mode | date-format | drop-indexes
date-format = <'date'> <ws> <'format'> <ws> quoted-string
drop-indexes = <'drop'> <ws> <'indexes'>
csv-encoding = <'encoding'> <ws> quoted-string
skip-header = <'skip'> <ws> <'header'> <opt-ws> <'='> <opt-ws> integer
Expand Down
51 changes: 51 additions & 0 deletions clojure/test/pgloader/load_file/parser_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,57 @@
;; verify the INLINE data contains test rows
(is (str/includes? (:inline-data source) "10-02-1999 00-33-12.123456"))))))

(deftest test-parse-csv-with-default-date-format
(testing "WITH date format applies to typed target date columns and explicit field formats override it"
(let [result (parser/parse-string
"LOAD CSV FROM '/data/dates.csv'
(id, created_at, closed_at [date format 'DD/MM/YYYY'])
INTO postgresql:///target
TARGET TABLE public.events
(id integer, created_at timestamptz, closed_at date)
WITH date format 'YYYY-MM-DD HH24-MI-SS.US';")]
(is (:ok result) (str "Parse failed: " (:error result)))
(let [cmd (:ok result)
formats (get-in cmd [:source :column-formats])]
(is (= "YYYY-MM-DD HH24-MI-SS.US"
(some #(when (= "created_at" (:name %)) (:date-format %))
formats)))
(is (= "DD/MM/YYYY"
(some #(when (= "closed_at" (:name %)) (:date-format %))
formats)))
(is (nil? (some #(when (= "id" (:name %)) (:date-format %))
formats)))))))

(deftest test-parse-csv-default-date-format-matches-target-names
(testing "WITH date format does not fall back to target projection positions"
(let [result (parser/parse-string
"LOAD CSV FROM '/data/dates.csv'
(id, name, created_at)
INTO postgresql:///target
TARGET TABLE public.events
(id integer, created_at timestamptz, name text)
WITH date format 'YYYY-MM-DD';")]
(is (:ok result) (str "Parse failed: " (:error result)))
(let [formats (get-in result [:ok :source :column-formats])]
(is (= "YYYY-MM-DD"
(some #(when (= "created_at" (:name %)) (:date-format %))
formats)))
(is (nil? (some #(when (= "name" (:name %)) (:date-format %))
formats)))))))

(deftest test-parse-fixed-with-date-format
(testing "LOAD FIXED accepts WITH date format"
(let [result (parser/parse-string
"LOAD FIXED FROM fixed:///data/events.dat
(id from 0 for 2, created_at from 2 for 10)
INTO postgresql:///target
WITH date format 'YYYY-MM-DD';")]
(is (:ok result) (str "Parse failed: " (:error result)))
(let [cmd (:ok result)]
(is (= :fixed (:load-type cmd)))
(is (= "YYYY-MM-DD"
(get-in cmd [:with-options :date-format])))))))

;; ── CSV null-if tests (issues #1135, #1221) ─────────────────────────────────

(deftest test-parse-csv-null-if-blanks-per-column
Expand Down
12 changes: 11 additions & 1 deletion docs/ref/csv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,17 @@ When loading from a `CSV` file, the following options are supported:
names to be found in the CSV file, using the same CSV parameters as
for the CSV data.

- *date format*

Takes a date format string as argument. When target columns are of a
PostgreSQL date/time type, this option applies the format to those
fields by default. A per-field *date format* specification still takes
precedence for that field.

Here's an example of a *WITH* date format specification::

WITH date format 'YYYY-MM-DD HH24-MI-SS.US'

- *trim unquoted blanks*

When reading unquoted values in the `CSV` file, remove the blanks
Expand Down Expand Up @@ -259,4 +270,3 @@ When loading from a `CSV` file, the following options are supported:

This character is used to recognize *end-of-line* condition when
reading the `CSV` data.

12 changes: 11 additions & 1 deletion docs/ref/fixed.rst
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,17 @@ Fixed File Format Loading Options: WITH

When loading from a `FIXED` file, the following options are supported:

- *date format*

Takes a date format string as argument. When target columns are of a
PostgreSQL date/time type, this option applies the format to those
fields by default. A per-field *date format* specification still takes
precedence for that field.

Here's an example of a *WITH* date format specification::

WITH date format 'YYYY-MM-DD HH24-MI-SS.US'

- *truncate*

When this option is listed, pgloader issues a `TRUNCATE` command
Expand All @@ -201,4 +212,3 @@ When loading from a `FIXED` file, the following options are supported:

Takes a numeric value as argument. Instruct pgloader to skip that
many lines at the beginning of the input file.

1 change: 1 addition & 0 deletions src/package.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@
#:encoding
#:skip-lines
#:header
#:date-format

;; md-copy protocol/api
#:parse-header
Expand Down
1 change: 1 addition & 0 deletions src/parsers/command-csv.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
option-trim-unquoted-blanks
option-keep-unquoted-blanks
option-csv-escape-mode
option-date-format
option-null-if))

(defrule csv-options (and kw-with
Expand Down
2 changes: 2 additions & 0 deletions src/parsers/command-fixed.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
option-disable-triggers
option-identifiers-case
option-skip-header
option-date-format
option-fixed-header))

(defrule fixed-options (and kw-with
Expand Down Expand Up @@ -148,6 +149,7 @@
:encoding ,encoding
:fields ',fields
:columns ',columns
:date-format ,(getf options :date-format)
:skip-lines ,(or (getf options :skip-lines) 0)
:header ,(getf options :header))))

Expand Down
4 changes: 3 additions & 1 deletion src/sources/common/api.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@
:initform 0) ;
(header :accessor header ; CSV headers are col names
:initarg :header ;
:initform nil) ;
(date-format :accessor date-format ; default date format
:initarg :date-format ;
:initform nil)) ;
(:documentation "pgloader Multiple Files Data Source (csv, fixed, copy)."))

Expand Down Expand Up @@ -185,4 +188,3 @@

(defgeneric drop-matviews (matview-list db-copy)
(:documentation "Drop Materialized Views."))

6 changes: 4 additions & 2 deletions src/sources/common/md-methods.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
fields to columns projections (mapping)."
(reformat-then-process :fields (fields copy)
:columns (columns copy)
:target (target copy)))
:target (target copy)
:date-format (date-format copy)))

(defmethod copy-column-list ((copy md-copy))
"We did reformat-then-process the column list, so we now send them in the
Expand All @@ -39,7 +40,8 @@
(make-list (length (columns copy))))
:encoding (encoding copy)
:skip-lines (skip-lines copy)
:header (header copy)))
:header (header copy)
:date-format (date-format copy)))

(defmethod map-rows ((copy md-copy) &key process-row-fn)
"Load data from a text file in CSV format, with support for advanced
Expand Down
65 changes: 57 additions & 8 deletions src/sources/common/project-fields.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,48 @@
;;;
(in-package #:pgloader.sources)

(defun project-fields (&key fields columns (compile t))
(defun date/time-type-name-p (type-name)
"Return true when TYPE-NAME is a PostgreSQL date/time type."
(let ((type-name (when type-name (string-downcase type-name))))
(member type-name
'("date"
"time"
"time without time zone"
"time with time zone"
"timetz"
"timestamp"
"timestamp without time zone"
"timestamp with time zone"
"timestamptz")
:test #'string=)))

(defun column-type-name-string (column)
"Return COLUMN's type name as a string."
(let ((type-name (column-type-name column)))
(typecase type-name
(sqltype (sqltype-name type-name))
(string type-name))))

(defun target-date/time-column-names (target)
"Return the target column names that should use a default date format."
(when (typep target 'table)
(loop :for column :in (table-column-list target)
:when (date/time-type-name-p (column-type-name-string column))
:collect (column-name column))))

(defun project-fields (&key fields columns target date-format (compile t))
"The simplest projection happens when both FIELDS and COLS are nil: in
this case the projection is an identity, we simply return what we got.

Other forms of projections consist of forming columns with the result of
applying a transformation function. In that case a cols entry is a list
of '(colname type expression), the expression being the (already
compiled) function to use here."
(labels ((null-as-match-p (null-as col)
(let* ((global-date-format date-format)
(target-date/time-column-names
(when global-date-format
(target-date/time-column-names target))))
(labels ((null-as-match-p (null-as col)
"Return T if COL matches one NULL-AS spec (:blanks or a string)."
(if (eq null-as :blanks)
(every (lambda (char) (char= char #\Space)) col)
Expand All @@ -32,6 +65,16 @@
(loop :for (k v) :on plist :by #'cddr
:when (eq k :null-as) :collect v))

(field-name (field-name-or-list)
(typecase field-name-or-list
(list (car field-name-or-list))
(t field-name-or-list)))

(target-date/time-field-p (field-name-or-list)
(member (field-name field-name-or-list)
target-date/time-column-names
:test #'string-equal))

(field-name-as-symbol (field-name-or-list)
"we need to deal with symbols as we generate code"
(typecase field-name-or-list
Expand All @@ -51,6 +94,10 @@
trim-right
&allow-other-keys)
plist
(let ((date-format (or date-format
(when (target-date/time-field-p
field-name-or-list)
global-date-format))))
;; now prepare a function of a column
(lambda (col)
(let ((value-or-null
Expand All @@ -69,9 +116,9 @@
(if date-format
(parse-date-string value-or-null
(parse-date-format date-format))
value-or-null)))))))))
value-or-null))))))))))

(let* ((projection
(let* ((projection
(cond
;; when no specific information has been given on FIELDS and
;; COLUMNS, just apply generic NULL-AS processing
Expand Down Expand Up @@ -133,14 +180,17 @@
(declare (ignorable ,@args))
(vector ,@newrow)))))))))
;; allow for some debugging
(if compile (compile nil projection) projection))))
(if compile (compile nil projection) projection)))))

(defun reformat-then-process (&key fields columns target)
(defun reformat-then-process (&key fields columns target date-format)
"Return a lambda form to apply to each row we read.

The lambda closes over the READ paramater, which is a counter of how many
lines we did read in the file."
(let ((projection (project-fields :fields fields :columns columns)))
(let ((projection (project-fields :fields fields
:columns columns
:target target
:date-format date-format)))
(lambda (row)
;; cl-csv returns (nil) for an empty line
(if (or (null row)
Expand All @@ -152,4 +202,3 @@
(condition (e)
(update-stats :data target :errs 1)
(log-message :error "Could not read input: ~a" e)))))))

Loading