11import duckdb
22import ibis
3+ import pandas as pd
34import pytest
5+ from kedro .io import DatasetError
46from packaging .version import Version
57from pandas .testing import assert_frame_equal
68
9+ from kedro_datasets import KedroDeprecationWarning
710from kedro_datasets .ibis import FileDataset , TableDataset
811
912_SENTINEL = object ()
@@ -37,13 +40,15 @@ def connection_config(request, database):
3740
3841@pytest .fixture
3942def table_dataset (database_name , connection_config , load_args , save_args ):
40- return TableDataset (
43+ ds = TableDataset (
4144 table_name = "test" ,
4245 database = database_name ,
4346 connection = connection_config ,
4447 load_args = load_args ,
4548 save_args = save_args ,
4649 )
50+ yield ds
51+ getattr (ds ._connection , f"drop_{ ds ._materialized } " )("test" , force = True )
4752
4853
4954@pytest .fixture
@@ -77,9 +82,10 @@ def test_save_and_load(self, table_dataset, dummy_table, database):
7782 assert "test" in con .sql ("SELECT * FROM duckdb_views" ).fetchnumpy ()["view_name" ]
7883
7984 @pytest .mark .parametrize (
80- "connection_config" , [{"backend" : "polars" }], indirect = True
85+ ("connection_config" , "save_args" ),
86+ [({"backend" : "polars" }, {"materialized" : "table" })],
87+ indirect = True ,
8188 )
82- @pytest .mark .parametrize ("save_args" , [{"materialized" : "table" }], indirect = True )
8389 def test_save_and_load_polars (
8490 self , table_dataset , connection_config , save_args , dummy_table
8591 ):
@@ -102,14 +108,147 @@ def test_exists(self, table_dataset, dummy_table):
102108 table_dataset .save (dummy_table )
103109 assert table_dataset .exists ()
104110
111+ @pytest .mark .parametrize (
112+ "save_args" , [{"materialized" : "table" , "mode" : "append" }], indirect = True
113+ )
114+ def test_save_mode_append (self , table_dataset , dummy_table ):
115+ """Saving with mode=append should add rows to an existing table."""
116+ df1 = dummy_table
117+ df2 = dummy_table
118+
119+ table_dataset .save (df1 )
120+ table_dataset .save (df2 )
121+
122+ df1 = df1 .execute ()
123+ df2 = df2 .execute ()
124+ reloaded = table_dataset .load ().execute ()
125+ assert len (reloaded ) == len (df1 ) + len (df2 )
126+
127+ @pytest .mark .parametrize (
128+ "save_args" ,
129+ [
130+ {"materialized" : "table" , "mode" : "error" },
131+ {"materialized" : "table" , "mode" : "errorifexists" },
132+ ],
133+ indirect = True ,
134+ )
135+ def test_save_mode_error_variants (self , table_dataset , dummy_table ):
136+ """Saving with error/errorifexists should raise when table exists."""
137+ table_dataset .save (dummy_table )
138+ with pytest .raises (DatasetError , match = 'Table with name "test" already exists' ):
139+ table_dataset .save (dummy_table )
140+
141+ @pytest .mark .parametrize (
142+ "save_args" , [{"materialized" : "table" , "mode" : "ignore" }], indirect = True
143+ )
144+ def test_save_mode_ignore (self , table_dataset , dummy_table ):
145+ """Saving with ignore should not change existing table."""
146+ df1 = dummy_table
147+ df2 = dummy_table
148+
149+ table_dataset .save (df1 )
150+ table_dataset .save (df2 )
151+ df1 = df1 .execute ()
152+
153+ reloaded = table_dataset .load ().execute ()
154+ # Should remain as first save only
155+ assert_frame_equal (reloaded .reset_index (drop = True ), df1 .reset_index (drop = True ))
156+
157+ def test_unsupported_save_mode_raises (self , database_name , connection_config ):
158+ """Providing an unsupported save mode should raise a DatasetError."""
159+ with pytest .raises (
160+ ValueError , match = "'unsupported_mode' is not a valid SaveMode"
161+ ):
162+ TableDataset (
163+ table_name = "unsupported_mode" ,
164+ database = database_name ,
165+ connection = connection_config ,
166+ save_args = {"materialized" : "table" , "mode" : "unsupported_mode" },
167+ )
168+
169+ def test_legacy_overwrite_conflict_raises (self , database_name , connection_config ):
170+ """Providing both mode and overwrite should raise a ValueError."""
171+ with pytest .raises (ValueError ):
172+ TableDataset (
173+ table_name = "conflict" ,
174+ database = database_name ,
175+ connection = connection_config ,
176+ save_args = {
177+ "materialized" : "table" ,
178+ "mode" : "append" ,
179+ "overwrite" : True ,
180+ },
181+ )
182+
183+ def test_legacy_overwrite_deprecation_warning (
184+ self , database_name , connection_config
185+ ):
186+ """Using legacy overwrite should raise a deprecation warning."""
187+ with pytest .warns (KedroDeprecationWarning , match = "'overwrite' is deprecated" ):
188+ TableDataset (
189+ table_name = "deprecated_overwrite" ,
190+ database = database_name ,
191+ connection = connection_config ,
192+ save_args = {"overwrite" : True },
193+ )
194+
195+ @pytest .mark .parametrize (
196+ ("connection_config" , "save_args" ),
197+ [({"backend" : "polars" }, {"materialized" : "table" , "mode" : "append" })],
198+ indirect = True ,
199+ )
200+ def test_append_mode_no_insert_raises (self , table_dataset , dummy_table ):
201+ """Test that saving with mode=append on a backend without 'insert' raises DatasetError (polars backend)."""
202+ # Save once to create the table
203+ table_dataset .save (dummy_table )
204+ # Try to append again, should raise DatasetError
205+ with pytest .raises (DatasetError , match = "does not support inserts" ):
206+ table_dataset .save (dummy_table )
207+
208+ @pytest .mark .parametrize (
209+ "save_args" ,
210+ [
211+ {"materialized" : "table" , "overwrite" : True },
212+ {"materialized" : "table" , "overwrite" : False },
213+ ],
214+ indirect = True ,
215+ )
216+ def test_legacy_overwrite_behavior (self , table_dataset , save_args , dummy_table ):
217+ """Legacy overwrite should map to overwrite or error behavior."""
218+ legacy_overwrite = save_args ["overwrite" ]
219+ df2 = ibis .memtable (pd .DataFrame ({"col1" : [7 ], "col2" : [8 ], "col3" : [9 ]}))
220+
221+ table_dataset .save (dummy_table ) # First save should always work
222+ if legacy_overwrite :
223+ # Should overwrite existing table with new contents
224+ table_dataset .save (df2 )
225+ df2 = df2 .execute ()
226+ out = table_dataset .load ().execute ().reset_index (drop = True )
227+ assert_frame_equal (out , df2 .reset_index (drop = True ))
228+ else :
229+ # Should raise on second save when table exists
230+ with pytest .raises (DatasetError ):
231+ table_dataset .save (df2 )
232+
233+ def test_describe_includes_backend_mode_and_materialized (self , table_dataset ):
234+ """_describe should expose backend, mode and materialized; nested args exclude database."""
235+
236+ desc = table_dataset ._describe ()
237+
238+ assert {"backend" , "mode" , "materialized" } <= desc .keys ()
239+ assert "database" in desc
240+ # database key should not be duplicated inside nested args
241+ assert "database" not in desc ["load_args" ]
242+ assert "database" not in desc ["save_args" ]
243+
105244 @pytest .mark .parametrize ("load_args" , [{"database" : "test" }], indirect = True )
106245 def test_load_extra_params (self , table_dataset , load_args ):
107246 """Test overriding the default load arguments."""
108247 for key , value in load_args .items ():
109248 assert table_dataset ._load_args [key ] == value
110249
111250 @pytest .mark .parametrize ("save_args" , [{"materialized" : "table" }], indirect = True )
112- def test_save_extra_params (self , table_dataset , save_args , dummy_table , database ):
251+ def test_save_extra_params (self , table_dataset , dummy_table , database ):
113252 """Test overriding the default save arguments."""
114253 table_dataset .save (dummy_table )
115254
0 commit comments