|
| 1 | +import os |
| 2 | +import tempfile |
| 3 | + |
| 4 | +import numpy as np |
1 | 5 | import pandas as pd
|
2 | 6 |
|
3 |
| -print(pd.__file__) |
4 |
| -print(pd.__version__) |
5 | 7 |
|
6 |
| -import numpy as np |
| 8 | +def test_preserve_numpy_arrays_in_csv(): |
| 9 | + print("\nRunning: test_preserve_numpy_arrays_in_csv") |
| 10 | + df = pd.DataFrame({ |
| 11 | + "id": [1, 2], |
| 12 | + "embedding": [ |
| 13 | + np.array([0.1, 0.2, 0.3]), |
| 14 | + np.array([0.4, 0.5, 0.6]) |
| 15 | + ], |
| 16 | + }) |
| 17 | + |
| 18 | + with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: |
| 19 | + path = tmp.name |
| 20 | + |
| 21 | + try: |
| 22 | + df.to_csv(path, index=False, preserve_complex=True) |
| 23 | + df_loaded = pd.read_csv(path, preserve_complex=True) |
| 24 | + assert isinstance(df_loaded["embedding"][0], np.ndarray), ( |
| 25 | + "Test Failed: The CSV did not preserve embeddings as NumPy arrays!" |
| 26 | + ) |
| 27 | + print("PASS: test_preserve_numpy_arrays_in_csv") |
| 28 | + finally: |
| 29 | + os.remove(path) |
| 30 | + |
| 31 | + |
| 32 | +def test_preserve_numpy_arrays_in_csv_empty_dataframe(): |
| 33 | + print("\nRunning: test_preserve_numpy_arrays_in_csv_empty_dataframe") |
| 34 | + df = pd.DataFrame({"embedding": []}) |
| 35 | + expected = "embedding\n" |
| 36 | + |
| 37 | + with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: |
| 38 | + path = tmp.name |
| 39 | + |
| 40 | + try: |
| 41 | + df.to_csv(path, index=False, preserve_complex=True) |
| 42 | + with open(path, encoding="utf-8") as f: |
| 43 | + result = f.read() |
| 44 | + assert result == expected, ( |
| 45 | + f"CSV output mismatch for empty DataFrame.\nGot:\n{result}\nExpected:\n{expected}" |
| 46 | + ) |
| 47 | + print("PASS: test_preserve_numpy_arrays_in_csv_empty_dataframe") |
| 48 | + finally: |
| 49 | + os.remove(path) |
| 50 | + |
| 51 | + |
| 52 | +def test_preserve_numpy_arrays_in_csv_mixed_dtypes(): |
| 53 | + print("\nRunning: test_preserve_numpy_arrays_in_csv_mixed_dtypes") |
| 54 | + df = pd.DataFrame({ |
| 55 | + "id": [101, 102], |
| 56 | + "name": ["alice", "bob"], |
| 57 | + "scores": [ |
| 58 | + np.array([95.5, 88.0]), |
| 59 | + np.array([76.0, 90.5]) |
| 60 | + ], |
| 61 | + "age": [25, 30], |
| 62 | + }) |
| 63 | + |
| 64 | + with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: |
| 65 | + path = tmp.name |
| 66 | + |
| 67 | + try: |
| 68 | + df.to_csv(path, index=False, preserve_complex=True) |
| 69 | + df_loaded = pd.read_csv(path, preserve_complex=True) |
| 70 | + assert isinstance(df_loaded["scores"][0], np.ndarray), ( |
| 71 | + "Failed: 'scores' column not deserialized as np.ndarray." |
| 72 | + ) |
| 73 | + assert df_loaded["id"].dtype == np.int64, "Failed: 'id' should still be int." |
| 74 | + assert df_loaded["name"].dtype == object, "Failed: 'name' should still be object/string." |
| 75 | + assert df_loaded["age"].dtype == np.int64, "Failed: 'age' should still be int." |
| 76 | + |
| 77 | + print("PASS: test_preserve_numpy_arrays_in_csv_mixed_dtypes") |
| 78 | + finally: |
| 79 | + os.remove(path) |
| 80 | + |
7 | 81 |
|
8 |
| -# # Create a DataFrame with NumPy arrays |
9 |
| -# df = pd.DataFrame({ |
10 |
| -# 'id': [1, 2], |
11 |
| -# 'embedding': [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])] |
12 |
| -# }) |
13 |
| - |
14 |
| -# # Save to CSV (where your custom preserve_complex logic resides) |
15 |
| -# csv_file = "test_numpy_array.csv" |
16 |
| -# df.to_csv(csv_file, index=False, preserve_complex=True) |
17 |
| - |
18 |
| -# # Read back the raw CSV content (as text only) |
19 |
| -# with open(csv_file, "r") as f: |
20 |
| -# csv_content = f.read() |
21 |
| - |
22 |
| -# print(f"Saved CSV:\n{csv_content}") |
23 |
| - |
24 |
| -# # Simple test: check that our JSON-ified arrays are present in the CSV text |
25 |
| -# try: |
26 |
| -# assert "[0.1, 0.2, 0.3]" in csv_content |
27 |
| -# assert "[0.4, 0.5, 0.6]" in csv_content |
28 |
| -# print("\nTest Passed: The CSV output includes JSON-serialized arrays for 'embedding'.") |
29 |
| -# except AssertionError: |
30 |
| -# print("\nTest Failed: The CSV does not appear to have JSON-serialized arrays as expected!") |
31 |
| -# raise |
32 |
| - |
33 |
| - |
34 |
| - |
35 |
| -# TEST2 |
36 |
| -# Create a DataFrame with NumPy arrays |
37 |
| -df = pd.DataFrame({ |
38 |
| - "id": [1, 2], |
39 |
| - "embedding": [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])] |
40 |
| -}) |
41 |
| - |
42 |
| -# Save to CSV |
43 |
| -csv_file = "test_numpy_array.csv" |
44 |
| -df.to_csv(csv_file, index=False, preserve_complex=True) |
45 |
| -print(f"Saved CSV:\n{open(csv_file).read()}") |
46 |
| - |
47 |
| -# Read back the CSV |
48 |
| -df_loaded = pd.read_csv(csv_file, preserve_complex=True) |
49 |
| - |
50 |
| -# Print results |
51 |
| -print("\nLoaded DataFrame:") |
52 |
| -print(df_loaded) |
53 |
| - |
54 |
| -# ✅ **Make the test fail by checking if we correctly load NumPy arrays** |
55 |
| -try: |
56 |
| - assert isinstance(df_loaded["embedding"][0], np.ndarray), "Test Failed: Embeddings were not preserved as NumPy arrays!" |
57 |
| - print("\nTest Passed: Embeddings were correctly preserved as NumPy arrays") |
58 |
| -except AssertionError as e: |
59 |
| - print("\nTest Failed: Pandas does not preserve NumPy arrays in CSV, needs improvement!") |
60 |
| - raise e |
| 82 | +if __name__ == "__main__": |
| 83 | + test_preserve_numpy_arrays_in_csv() |
| 84 | + test_preserve_numpy_arrays_in_csv_empty_dataframe() |
| 85 | + test_preserve_numpy_arrays_in_csv_mixed_dtypes() |
0 commit comments