@@ -741,3 +741,55 @@ def test_to_csv_iterative_compression_buffer(compression):
741
741
pd .read_csv (buffer , compression = compression , index_col = 0 ), df
742
742
)
743
743
assert not buffer .closed
744
+
745
+
746
+ def test_preserve_numpy_arrays_in_csv (self ):
747
+ df = pd .DataFrame ({
748
+ "id" : [1 , 2 ],
749
+ "embedding" : [
750
+ np .array ([0.1 , 0.2 , 0.3 ]),
751
+ np .array ([0.4 , 0.5 , 0.6 ])
752
+ ],
753
+ })
754
+
755
+ with tm .ensure_clean ("test.csv" ) as path :
756
+ df .to_csv (path , index = False , preserve_complex = True )
757
+ df_loaded = pd .read_csv (path , preserve_complex = True )
758
+
759
+ # Validate that embeddings are still NumPy arrays
760
+ assert isinstance (df_loaded ["embedding" ][0 ], np .ndarray ), (
761
+ "Test Failed: The CSV did not preserve embeddings as NumPy arrays!"
762
+ )
763
+
764
+
765
+ def test_preserve_numpy_arrays_in_csv_empty_dataframe (self ):
766
+ df = pd .DataFrame ({"embedding" : []})
767
+ expected = """\embedding"""
768
+
769
+ with tm .ensure_clean ("test.csv" ) as path :
770
+ df .to_csv (path , index = False , preserve_complex = True )
771
+ with open (path , encoding = "utf-8" ) as f :
772
+ result = f .read ()
773
+
774
+ assert result == expected , f"CSV output mismatch for empty DataFrame.\n Got:\n { result } "
775
+
776
+
777
+ def test_preserve_numpy_arrays_in_csv_mixed_dtypes (self ):
778
+ df = pd .DataFrame ({
779
+ "id" : [101 , 102 ],
780
+ "name" : ["alice" , "bob" ],
781
+ "scores" : [np .array ([95.5 , 88.0 ]), np .array ([76.0 , 90.5 ])],
782
+ "age" : [25 , 30 ],
783
+ })
784
+
785
+ with tm .ensure_clean ("test.csv" ) as path :
786
+ df .to_csv (path , index = False , preserve_complex = True )
787
+ df_loaded = pd .read_csv (path , preserve_complex = True )
788
+
789
+ assert isinstance (df_loaded ["scores" ][0 ], np .ndarray ), (
790
+ "Failed: 'scores' column not deserialized as np.ndarray."
791
+ )
792
+
793
+ assert df_loaded ["id" ].dtype == np .int64 , "Failed: 'id' should still be int."
794
+ assert df_loaded ["name" ].dtype == object , "Failed: 'name' should still be string/object."
795
+ assert df_loaded ["age" ].dtype == np .int64 , "Failed: 'age' should still be int."
0 commit comments