Skip to content

Commit 66e7dc1

Browse files
authored
Add ability to specify modification value type in CopyIncrementally (#53)
Some databases do not automatically convert from string to the actual value: e.g. `timestamp_column >= '2020-01-01'` fails. This would break incremental loading as there we generated this kind of comparison. The new modification_comparison_type argument to `CopyIncrementally()` will allow to alter this to `timestamp_column >= TIMESTAMP '2020-01-01'`. Also fixup the docstring for `comparison_value_placeholder`argument which should actually be named `comparison_placeholder`and now at least the docstring reads like that.
1 parent 434b935 commit 66e7dc1

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

mara_pipelines/commands/sql.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ def __init__(self, source_db_alias: str, source_table: str,
216216
sql_file_name: Union[str, Callable] = None, sql_statement: Union[str, Callable] = None,
217217
target_db_alias: str = None, timezone: str = None, replace: {str: str} = None,
218218
use_explicit_upsert: bool = False,
219-
csv_format: bool = None, delimiter_char: str = None) -> None:
219+
csv_format: bool = None, delimiter_char: str = None,
220+
modification_comparison_type: str = None) -> None:
220221
"""
221222
Incrementally loads data from one database into another.
222223
@@ -233,7 +234,9 @@ def __init__(self, source_db_alias: str, source_table: str,
233234
sql_file_name: The path of a file name that is run to query the source database
234235
replace: A set of replacements to perform against the sql query
235236
modification_comparison: SQL expression that evaluates to a comparable value
236-
comparison_value_placeholder: A placeholder that is replaced with the last comparison value in the sql query
237+
modification_comparison_type: type of the saved (as string) modification_comparison value
238+
comparison_value_placeholder: A placeholder in the sql code that gets replaced with the
239+
actual incremental load comparison or `1=1`.
237240
target_db_alias: The database to write to
238241
target_table: The table for loading data into
239242
primary_keys: A combination of primary key columns that are used for upserting into the target table
@@ -244,6 +247,7 @@ def __init__(self, source_db_alias: str, source_table: str,
244247
self.source_db_alias = source_db_alias
245248
self.source_table = source_table
246249
self.modification_comparison = modification_comparison
250+
self.modification_comparison_type = modification_comparison_type
247251
self.comparison_value_placeholder = comparison_value_placeholder
248252

249253
self._target_db_alias = target_db_alias
@@ -326,8 +330,9 @@ def run(self) -> bool:
326330

327331
# perform the actual copy replacing the placeholder
328332
# with the comparison value from the latest successful execution
333+
modification_comparison_type = self.modification_comparison_type or ''
329334
replace = {self.comparison_value_placeholder:
330-
f'({self.modification_comparison} >= \'{last_comparison_value}\')'}
335+
f'({self.modification_comparison} >= {modification_comparison_type} \'{last_comparison_value}\')'}
331336
if not shell.run_shell_command(self._copy_command(self.target_table + '_upsert', replace)):
332337
return False
333338

@@ -393,6 +398,7 @@ def html_doc_items(self) -> [(str, str)]:
393398
('modification comparison', _.tt[self.modification_comparison])] \
394399
+ _SQLCommand.html_doc_items(self, self.source_db_alias) \
395400
+ [('comparison value placeholder', _.tt[self.comparison_value_placeholder]),
401+
('modification comparison type', _.tt[self.modification_comparison_type if self.modification_comparison_type else '(no cast)']),
396402
('target db', _.tt[self.target_db_alias]),
397403
('target table', _.tt[self.target_table]),
398404
('primary_keys', _.tt[repr(self.primary_keys)]),

0 commit comments

Comments
 (0)