@@ -1178,6 +1178,45 @@ def test_dataframe_csv_chunked(self):
11781178 df_idx_res = A .query (coords = False ).df [int (ned [0 ]) : int (ned [1 ])]
11791179 tm .assert_frame_equal (df_idx_res , df .reset_index (drop = True ))
11801180
1181+ def test_dataframe_csv_chunked_sparse_row_indices (self ):
1182+ """Test that chunked sparse CSV writes produce correct row indices.
1183+
1184+ Regression test for issue TileDB-Py#2278 where row_start_idx was being applied twice
1185+ when using chunksize with sparse arrays, causing indices to jump.
1186+ """
1187+ # Create a simple CSV with 9 rows
1188+ df = pd .DataFrame ({"char" : list ("foobarbaz" )})
1189+
1190+ tmp_dir = self .path ("csv_chunked_sparse_indices" )
1191+ self .vfs .create_dir (tmp_dir )
1192+ tmp_csv = os .path .join (tmp_dir , "source.csv" )
1193+
1194+ with tiledb .FileIO (self .vfs , tmp_csv , "wb" ) as fio :
1195+ df .to_csv (fio , index = False )
1196+
1197+ # Write without chunking (as baseline)
1198+ tmp_array_unchunked = os .path .join (tmp_dir , "unchunked" )
1199+ tiledb .from_csv (tmp_array_unchunked , csv_file = tmp_csv , sparse = True )
1200+
1201+ # Write with chunking (3 rows per chunk)
1202+ tmp_array_chunked = os .path .join (tmp_dir , "chunked" )
1203+ tiledb .from_csv (tmp_array_chunked , csv_file = tmp_csv , sparse = True , chunksize = 3 )
1204+
1205+ # Read back both arrays
1206+ with tiledb .open (tmp_array_unchunked ) as A :
1207+ df_unchunked = A .df [:]
1208+
1209+ with tiledb .open (tmp_array_chunked ) as A :
1210+ df_chunked = A .df [:]
1211+
1212+ # Both should have the same indices: [0, 1, 2, 3, 4, 5, 6, 7, 8]
1213+ expected_index = list (range (9 ))
1214+ self .assertEqual (list (df_unchunked .index ), expected_index )
1215+ self .assertEqual (list (df_chunked .index ), expected_index )
1216+
1217+ # DataFrames should be identical
1218+ tm .assert_frame_equal (df_chunked , df_unchunked )
1219+
11811220 def test_csv_fillna (self ):
11821221 if pytest .tiledb_vfs == "s3" :
11831222 pytest .skip (
0 commit comments