Skip to content

Commit a012f40

Browse files
committed
Move tests from pytest to original test file
1 parent 28f4051 commit a012f40

File tree

2 files changed

+82
-109
lines changed

2 files changed

+82
-109
lines changed

pandas/tests/io/formats/test_to_csv.py

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -740,56 +740,4 @@ def test_to_csv_iterative_compression_buffer(compression):
740740
tm.assert_frame_equal(
741741
pd.read_csv(buffer, compression=compression, index_col=0), df
742742
)
743-
assert not buffer.closed
744-
745-
746-
def test_preserve_numpy_arrays_in_csv(self):
747-
df = pd.DataFrame({
748-
"id": [1, 2],
749-
"embedding": [
750-
np.array([0.1, 0.2, 0.3]),
751-
np.array([0.4, 0.5, 0.6])
752-
],
753-
})
754-
755-
with tm.ensure_clean("test.csv") as path:
756-
df.to_csv(path, index=False, preserve_complex=True)
757-
df_loaded = pd.read_csv(path, preserve_complex=True)
758-
759-
# Validate that embeddings are still NumPy arrays
760-
assert isinstance(df_loaded["embedding"][0], np.ndarray), (
761-
"Test Failed: The CSV did not preserve embeddings as NumPy arrays!"
762-
)
763-
764-
765-
def test_preserve_numpy_arrays_in_csv_empty_dataframe(self):
766-
df = pd.DataFrame({"embedding": []})
767-
expected = """\embedding"""
768-
769-
with tm.ensure_clean("test.csv") as path:
770-
df.to_csv(path, index=False, preserve_complex=True)
771-
with open(path, encoding="utf-8") as f:
772-
result = f.read()
773-
774-
assert result == expected, f"CSV output mismatch for empty DataFrame.\nGot:\n{result}"
775-
776-
777-
def test_preserve_numpy_arrays_in_csv_mixed_dtypes(self):
778-
df = pd.DataFrame({
779-
"id": [101, 102],
780-
"name": ["alice", "bob"],
781-
"scores": [np.array([95.5, 88.0]), np.array([76.0, 90.5])],
782-
"age": [25, 30],
783-
})
784-
785-
with tm.ensure_clean("test.csv") as path:
786-
df.to_csv(path, index=False, preserve_complex=True)
787-
df_loaded = pd.read_csv(path, preserve_complex=True)
788-
789-
assert isinstance(df_loaded["scores"][0], np.ndarray), (
790-
"Failed: 'scores' column not deserialized as np.ndarray."
791-
)
792-
793-
assert df_loaded["id"].dtype == np.int64, "Failed: 'id' should still be int."
794-
assert df_loaded["name"].dtype == object, "Failed: 'name' should still be string/object."
795-
assert df_loaded["age"].dtype == np.int64, "Failed: 'age' should still be int."
743+
assert not buffer.closed

scripts/tests/test_csv.py

Lines changed: 81 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,85 @@
1+
import os
2+
import tempfile
3+
4+
import numpy as np
15
import pandas as pd
26

3-
print(pd.__file__)
4-
print(pd.__version__)
57

6-
import numpy as np
8+
def test_preserve_numpy_arrays_in_csv():
9+
print("\nRunning: test_preserve_numpy_arrays_in_csv")
10+
df = pd.DataFrame({
11+
"id": [1, 2],
12+
"embedding": [
13+
np.array([0.1, 0.2, 0.3]),
14+
np.array([0.4, 0.5, 0.6])
15+
],
16+
})
17+
18+
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
19+
path = tmp.name
20+
21+
try:
22+
df.to_csv(path, index=False, preserve_complex=True)
23+
df_loaded = pd.read_csv(path, preserve_complex=True)
24+
assert isinstance(df_loaded["embedding"][0], np.ndarray), (
25+
"Test Failed: The CSV did not preserve embeddings as NumPy arrays!"
26+
)
27+
print("PASS: test_preserve_numpy_arrays_in_csv")
28+
finally:
29+
os.remove(path)
30+
31+
32+
def test_preserve_numpy_arrays_in_csv_empty_dataframe():
33+
print("\nRunning: test_preserve_numpy_arrays_in_csv_empty_dataframe")
34+
df = pd.DataFrame({"embedding": []})
35+
expected = "embedding\n"
36+
37+
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
38+
path = tmp.name
39+
40+
try:
41+
df.to_csv(path, index=False, preserve_complex=True)
42+
with open(path, encoding="utf-8") as f:
43+
result = f.read()
44+
assert result == expected, (
45+
f"CSV output mismatch for empty DataFrame.\nGot:\n{result}\nExpected:\n{expected}"
46+
)
47+
print("PASS: test_preserve_numpy_arrays_in_csv_empty_dataframe")
48+
finally:
49+
os.remove(path)
50+
51+
52+
def test_preserve_numpy_arrays_in_csv_mixed_dtypes():
53+
print("\nRunning: test_preserve_numpy_arrays_in_csv_mixed_dtypes")
54+
df = pd.DataFrame({
55+
"id": [101, 102],
56+
"name": ["alice", "bob"],
57+
"scores": [
58+
np.array([95.5, 88.0]),
59+
np.array([76.0, 90.5])
60+
],
61+
"age": [25, 30],
62+
})
63+
64+
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
65+
path = tmp.name
66+
67+
try:
68+
df.to_csv(path, index=False, preserve_complex=True)
69+
df_loaded = pd.read_csv(path, preserve_complex=True)
70+
assert isinstance(df_loaded["scores"][0], np.ndarray), (
71+
"Failed: 'scores' column not deserialized as np.ndarray."
72+
)
73+
assert df_loaded["id"].dtype == np.int64, "Failed: 'id' should still be int."
74+
assert df_loaded["name"].dtype == object, "Failed: 'name' should still be object/string."
75+
assert df_loaded["age"].dtype == np.int64, "Failed: 'age' should still be int."
76+
77+
print("PASS: test_preserve_numpy_arrays_in_csv_mixed_dtypes")
78+
finally:
79+
os.remove(path)
80+
781

8-
# # Create a DataFrame with NumPy arrays
9-
# df = pd.DataFrame({
10-
# 'id': [1, 2],
11-
# 'embedding': [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])]
12-
# })
13-
14-
# # Save to CSV (where your custom preserve_complex logic resides)
15-
# csv_file = "test_numpy_array.csv"
16-
# df.to_csv(csv_file, index=False, preserve_complex=True)
17-
18-
# # Read back the raw CSV content (as text only)
19-
# with open(csv_file, "r") as f:
20-
# csv_content = f.read()
21-
22-
# print(f"Saved CSV:\n{csv_content}")
23-
24-
# # Simple test: check that our JSON-ified arrays are present in the CSV text
25-
# try:
26-
# assert "[0.1, 0.2, 0.3]" in csv_content
27-
# assert "[0.4, 0.5, 0.6]" in csv_content
28-
# print("\nTest Passed: The CSV output includes JSON-serialized arrays for 'embedding'.")
29-
# except AssertionError:
30-
# print("\nTest Failed: The CSV does not appear to have JSON-serialized arrays as expected!")
31-
# raise
32-
33-
34-
35-
# TEST2
36-
# Create a DataFrame with NumPy arrays
37-
df = pd.DataFrame({
38-
"id": [1, 2],
39-
"embedding": [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])]
40-
})
41-
42-
# Save to CSV
43-
csv_file = "test_numpy_array.csv"
44-
df.to_csv(csv_file, index=False, preserve_complex=True)
45-
print(f"Saved CSV:\n{open(csv_file).read()}")
46-
47-
# Read back the CSV
48-
df_loaded = pd.read_csv(csv_file, preserve_complex=True)
49-
50-
# Print results
51-
print("\nLoaded DataFrame:")
52-
print(df_loaded)
53-
54-
# ✅ **Make the test fail by checking if we correctly load NumPy arrays**
55-
try:
56-
assert isinstance(df_loaded["embedding"][0], np.ndarray), "Test Failed: Embeddings were not preserved as NumPy arrays!"
57-
print("\nTest Passed: Embeddings were correctly preserved as NumPy arrays")
58-
except AssertionError as e:
59-
print("\nTest Failed: Pandas does not preserve NumPy arrays in CSV, needs improvement!")
60-
raise e
82+
if __name__ == "__main__":
83+
test_preserve_numpy_arrays_in_csv()
84+
test_preserve_numpy_arrays_in_csv_empty_dataframe()
85+
test_preserve_numpy_arrays_in_csv_mixed_dtypes()

0 commit comments

Comments
 (0)