88
99import mara_db .dbs
1010import mara_db .shell
11+ import mara_db .postgresql
1112from mara_page import _ , html
1213from .. import config , shell , pipelines
1314from ..incremental_processing import file_dependencies
@@ -43,6 +44,7 @@ def sql_file_path(self) -> pathlib.Path:
4344 pipeline_candidate = self
4445 while not isinstance (pipeline_candidate , pipelines .Pipeline ):
4546 pipeline_candidate = pipeline_candidate .parent
47+ assert isinstance (pipeline_candidate , pipelines .Pipeline )
4648 return pipeline_candidate .base_path () / self .sql_file_name
4749
4850 def shell_command (self ):
@@ -163,7 +165,7 @@ def target_db_alias(self):
163165 return self ._target_db_alias or config .default_db_alias ()
164166
165167 def file_path (self ) -> pathlib .Path :
166- return self .parent .parent .base_path () / self .file_name
168+ return self .parent .parent .base_path () / self .sql_file_name
167169
168170 def run (self ) -> bool :
169171 if self .sql_file_name :
@@ -185,7 +187,6 @@ def run(self) -> bool:
185187 # (see also above in ExecuteSQL)
186188 file_dependencies .delete (self .node_path (), dependency_type )
187189
188-
189190 if not super ().run ():
190191 return False
191192
@@ -266,7 +267,7 @@ def run(self) -> bool:
266267 # retrieve the highest current value for the modification comparison (e.g.: the highest timestamp)
267268 # We intentionally use the command line here (rather than sqlalchemy) to avoid forcing people python drivers,
268269 # which can be hard for example in the case of SQL Server
269- logger .log (f'get highest modification comparison value' , format = logger .Format .ITALICS )
270+ logger .log (f'Get new max modification comparison value... ' , format = logger .Format .ITALICS )
270271 max_value_query = f'SELECT max({ self .modification_comparison } ) AS maxval FROM { self .source_table } '
271272 logger .log (max_value_query , format = logger .Format .VERBATIM )
272273 result = shell .run_shell_command (f'echo { shlex .quote (max_value_query )} \\ \n | '
@@ -275,30 +276,37 @@ def run(self) -> bool:
275276 if not result :
276277 return False
277278
279+ if isinstance (result , bool ):
280+ # This happens if the query above ran, but returned no data and therefore the load
281+ # query below would also return no data
282+ # We assume that this happens e.g. when there is no data *yet* and let the load succeed
283+ # without actually doing anything
284+ logger .log ("Found no data, not starting Copy." , format = logger .Format .VERBATIM )
285+ return True
278286 # be flexible with different output formats: remove the column header & remove whitespace & quotes
279287 max_modification_value = '' .join (result ).replace ('maxval' , '' ).strip ().strip ('"' )
280- logger .log (repr ( max_modification_value ) , format = logger .Format .VERBATIM )
288+ logger .log (f"New max modification comparison value: { max_modification_value !r } " , format = logger .Format .VERBATIM )
281289
282290 # check whether target table is empty
283291 target_table_is_empty = True
284292
285293 target_table_empty_query = f'SELECT TRUE FROM { self .target_table } LIMIT 1'
286- logger .log (f'check if target table is empty' , format = logger .Format .ITALICS )
294+ logger .log (f'Check if target table is empty' , format = logger .Format .ITALICS )
287295 logger .log (target_table_empty_query , format = logger .Format .VERBATIM )
288296 with mara_db .postgresql .postgres_cursor_context (self .target_db_alias ) as cursor :
289297 cursor .execute (f'SELECT TRUE FROM { self .target_table } LIMIT 1' )
290298 target_table_is_empty = not cursor .fetchone ()
291299 logger .log (f"target table{ '' if target_table_is_empty else ' not' } empty" , format = logger .Format .ITALICS )
292300
293301 # get last comparison value
294- logger .log ('get last comparison value' , format = logger .Format .ITALICS )
302+ logger .log ('Get last comparison value... ' , format = logger .Format .ITALICS )
295303 last_comparison_value = incremental_copy_status .get_last_comparison_value (
296304 self .node_path (), self .source_db_alias , self .source_table )
297- logger .log (repr ( last_comparison_value ) , format = logger .Format .VERBATIM )
305+ logger .log (f"Last max modification comparison value: { last_comparison_value !r } " , format = logger .Format .VERBATIM )
298306
299307 if target_table_is_empty or not last_comparison_value :
300308 # full load
301- logger .log ('full (non incremental) copy ' , logger .Format .ITALICS )
309+ logger .log ('Using full (non incremental) Copy ' , logger .Format .ITALICS )
302310 if not target_table_is_empty :
303311 truncate_query = f'TRUNCATE TABLE { self .target_table } '
304312 logger .log (truncate_query , format = logger .Format .VERBATIM )
@@ -309,7 +317,7 @@ def run(self) -> bool:
309317 # If we would crash during load (with some data already in the table), the next run would
310318 # not trigger a full load and we would miss data. To prevent that, delete the old
311319 # comparison value (we will then set it only on success)
312- logger .log ('deleting old comparison value' , logger .Format .ITALICS )
320+ logger .log ('Deleting old comparison value' , logger .Format .ITALICS )
313321 incremental_copy_status .delete (self .node_path (), self .source_db_alias , self .source_table )
314322
315323 # overwrite the comparison criteria to get everything
@@ -320,7 +328,7 @@ def run(self) -> bool:
320328
321329 else :
322330 # incremental load. First create the table which will contain the delta
323- logger .log ('incremental copy , create upsert table' , logger .Format .ITALICS )
331+ logger .log ('Using incremental Copy , create upsert table' , logger .Format .ITALICS )
324332 create_upsert_table_query = (f'DROP TABLE IF EXISTS { self .target_table } _upsert;\n '
325333 + f'CREATE TABLE { self .target_table } _upsert AS SELECT * from { self .target_table } WHERE FALSE' )
326334
0 commit comments