Move tests from pytest to original test file

Jaspvr · Jaspvr · commit a012f403c9d4 · 2025-03-28T14:59:51.000-07:00
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -740,56 +740,4 @@ def test_to_csv_iterative_compression_buffer(compression):
         tm.assert_frame_equal(
             pd.read_csv(buffer, compression=compression, index_col=0), df
         )
-        assert not buffer.closed
-
-
-def test_preserve_numpy_arrays_in_csv(self):
-        df = pd.DataFrame({
-            "id": [1, 2],
-            "embedding": [
-                np.array([0.1, 0.2, 0.3]),
-                np.array([0.4, 0.5, 0.6])
-            ],
-        })
-
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, index=False, preserve_complex=True)
-            df_loaded = pd.read_csv(path, preserve_complex=True)
-
-            # Validate that embeddings are still NumPy arrays
-            assert isinstance(df_loaded["embedding"][0], np.ndarray), (
-                "Test Failed: The CSV did not preserve embeddings as NumPy arrays!"
-            )
-
-
-def test_preserve_numpy_arrays_in_csv_empty_dataframe(self):
-        df = pd.DataFrame({"embedding": []})
-        expected = """\embedding"""
-
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, index=False, preserve_complex=True)
-            with open(path, encoding="utf-8") as f:
-                result = f.read()
-
-        assert result == expected, f"CSV output mismatch for empty DataFrame.\nGot:\n{result}"
-
-
-def test_preserve_numpy_arrays_in_csv_mixed_dtypes(self):
-        df = pd.DataFrame({
-            "id": [101, 102],
-            "name": ["alice", "bob"],
-            "scores": [np.array([95.5, 88.0]), np.array([76.0, 90.5])],
-            "age": [25, 30],
-        })
-
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, index=False, preserve_complex=True)
-            df_loaded = pd.read_csv(path, preserve_complex=True)
-
-        assert isinstance(df_loaded["scores"][0], np.ndarray), (
-            "Failed: 'scores' column not deserialized as np.ndarray."
-        )
-
-        assert df_loaded["id"].dtype == np.int64, "Failed: 'id' should still be int."
-        assert df_loaded["name"].dtype == object, "Failed: 'name' should still be string/object."
-        assert df_loaded["age"].dtype == np.int64, "Failed: 'age' should still be int."
+        assert not buffer.closed
diff --git a/scripts/tests/test_csv.py b/scripts/tests/test_csv.py
@@ -1,60 +1,85 @@
+import os
+import tempfile
+
+import numpy as np
 import pandas as pd
 
-print(pd.__file__)
-print(pd.__version__)
 
-import numpy as np
+def test_preserve_numpy_arrays_in_csv():
+    print("\nRunning: test_preserve_numpy_arrays_in_csv")
+    df = pd.DataFrame({
+        "id": [1, 2],
+        "embedding": [
+            np.array([0.1, 0.2, 0.3]),
+            np.array([0.4, 0.5, 0.6])
+        ],
+    })
+
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
+        path = tmp.name
+
+    try:
+        df.to_csv(path, index=False, preserve_complex=True)
+        df_loaded = pd.read_csv(path, preserve_complex=True)
+        assert isinstance(df_loaded["embedding"][0], np.ndarray), (
+            "Test Failed: The CSV did not preserve embeddings as NumPy arrays!"
+        )
+        print("PASS: test_preserve_numpy_arrays_in_csv")
+    finally:
+        os.remove(path)
+
+
+def test_preserve_numpy_arrays_in_csv_empty_dataframe():
+    print("\nRunning: test_preserve_numpy_arrays_in_csv_empty_dataframe")
+    df = pd.DataFrame({"embedding": []})
+    expected = "embedding\n"
+
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
+        path = tmp.name
+
+    try:
+        df.to_csv(path, index=False, preserve_complex=True)
+        with open(path, encoding="utf-8") as f:
+            result = f.read()
+        assert result == expected, (
+            f"CSV output mismatch for empty DataFrame.\nGot:\n{result}\nExpected:\n{expected}"
+        )
+        print("PASS: test_preserve_numpy_arrays_in_csv_empty_dataframe")
+    finally:
+        os.remove(path)
+
+
+def test_preserve_numpy_arrays_in_csv_mixed_dtypes():
+    print("\nRunning: test_preserve_numpy_arrays_in_csv_mixed_dtypes")
+    df = pd.DataFrame({
+        "id": [101, 102],
+        "name": ["alice", "bob"],
+        "scores": [
+            np.array([95.5, 88.0]),
+            np.array([76.0, 90.5])
+        ],
+        "age": [25, 30],
+    })
+
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
+        path = tmp.name
+
+    try:
+        df.to_csv(path, index=False, preserve_complex=True)
+        df_loaded = pd.read_csv(path, preserve_complex=True)
+        assert isinstance(df_loaded["scores"][0], np.ndarray), (
+            "Failed: 'scores' column not deserialized as np.ndarray."
+        )
+        assert df_loaded["id"].dtype == np.int64, "Failed: 'id' should still be int."
+        assert df_loaded["name"].dtype == object, "Failed: 'name' should still be object/string."
+        assert df_loaded["age"].dtype == np.int64, "Failed: 'age' should still be int."
+
+        print("PASS: test_preserve_numpy_arrays_in_csv_mixed_dtypes")
+    finally:
+        os.remove(path)
+
 
-# # Create a DataFrame with NumPy arrays
-# df = pd.DataFrame({
-#     'id': [1, 2],
-#     'embedding': [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])]
-# })
-
-# # Save to CSV (where your custom preserve_complex logic resides)
-# csv_file = "test_numpy_array.csv"
-# df.to_csv(csv_file, index=False, preserve_complex=True)
-
-# # Read back the raw CSV content (as text only)
-# with open(csv_file, "r") as f:
-#     csv_content = f.read()
-
-# print(f"Saved CSV:\n{csv_content}")
-
-# # Simple test: check that our JSON-ified arrays are present in the CSV text
-# try:
-#     assert "[0.1, 0.2, 0.3]" in csv_content
-#     assert "[0.4, 0.5, 0.6]" in csv_content
-#     print("\nTest Passed: The CSV output includes JSON-serialized arrays for 'embedding'.")
-# except AssertionError:
-#     print("\nTest Failed: The CSV does not appear to have JSON-serialized arrays as expected!")
-#     raise
-
-
-
-# TEST2
-# Create a DataFrame with NumPy arrays
-df = pd.DataFrame({
-    "id": [1, 2],
-    "embedding": [np.array([0.1, 0.2, 0.3]), np.array([0.4, 0.5, 0.6])]
-})
-
-# Save to CSV
-csv_file = "test_numpy_array.csv"
-df.to_csv(csv_file, index=False, preserve_complex=True)
-print(f"Saved CSV:\n{open(csv_file).read()}")
-
-# Read back the CSV
-df_loaded = pd.read_csv(csv_file, preserve_complex=True)
-
-# Print results
-print("\nLoaded DataFrame:")
-print(df_loaded)
-
-# ✅ **Make the test fail by checking if we correctly load NumPy arrays**
-try:
-    assert isinstance(df_loaded["embedding"][0], np.ndarray), "Test Failed: Embeddings were not preserved as NumPy arrays!"
-    print("\nTest Passed: Embeddings were correctly preserved as NumPy arrays")
-except AssertionError as e:
-    print("\nTest Failed: Pandas does not preserve NumPy arrays in CSV, needs improvement!")
-    raise e
+if __name__ == "__main__":
+    test_preserve_numpy_arrays_in_csv()
+    test_preserve_numpy_arrays_in_csv_empty_dataframe()
+    test_preserve_numpy_arrays_in_csv_mixed_dtypes()