From b5a6648cb928fee5fbac0580b6fe509e1251328b Mon Sep 17 00:00:00 2001 From: VibavariG Date: Sat, 14 Sep 2024 12:08:31 -0400 Subject: [PATCH 01/10] BUG: Fix columns param reorder issue - if columns!=None, use passed param (#59717) --- pandas/core/internals/construction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 07465e7b87fcd..81f17bd10e612 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -750,7 +750,8 @@ def to_arrays( elif isinstance(data, np.ndarray) and data.dtype.names is not None: # e.g. recarray - columns = Index(list(data.dtype.names)) + if columns is None: + columns = Index(list(data.dtype.names)) arrays = [data[k] for k in columns] return arrays, columns From 500bb6f88f4066797a617a5a64ee47c1299e33ab Mon Sep 17 00:00:00 2001 From: VibavariG Date: Sat, 14 Sep 2024 15:15:56 -0400 Subject: [PATCH 02/10] Add tests for to_arrays() --- pandas/tests/frame/methods/test_to_arrays.py | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 pandas/tests/frame/methods/test_to_arrays.py diff --git a/pandas/tests/frame/methods/test_to_arrays.py b/pandas/tests/frame/methods/test_to_arrays.py new file mode 100644 index 0000000000000..5108cac8feadd --- /dev/null +++ b/pandas/tests/frame/methods/test_to_arrays.py @@ -0,0 +1,26 @@ +from pandas.core.internals.construction import to_arrays +import numpy as np +from pandas.core.indexes.api import ensure_index +from numpy import array + +def test_to_arrays(): + # GH 59717 + data = np.array([ + ('John', 25, 'New York', 50000), + ('Jane', 30, 'San Francisco', 75000), + ('Bob', 35, 'Chicago', 65000), + ('Alice', 28, 'Los Angeles', 60000) + ], dtype=[('name', 'U10'), ('age', 'i4'), ('city', 'U15'), ('salary', 'i4')]) + + columns = ['name', 'salary', 'city'] + indexed_columns = ensure_index(columns) + + actual_arrays, actual_cols = to_arrays(data, indexed_columns) + expected_arrays = [array(['John', 'Jane', 'Bob', 'Alice'], dtype=' Date: Sun, 15 Sep 2024 13:42:51 -0400 Subject: [PATCH 03/10] Fix import order with isort --- pandas/tests/frame/methods/test_to_arrays.py | 40 ++++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_arrays.py b/pandas/tests/frame/methods/test_to_arrays.py index 5108cac8feadd..3b9669424255e 100644 --- a/pandas/tests/frame/methods/test_to_arrays.py +++ b/pandas/tests/frame/methods/test_to_arrays.py @@ -1,26 +1,34 @@ -from pandas.core.internals.construction import to_arrays import numpy as np -from pandas.core.indexes.api import ensure_index from numpy import array +import pandas._testing as tm +from pandas.core.indexes.api import ensure_index +from pandas.core.internals.construction import to_arrays + + def test_to_arrays(): # GH 59717 - data = np.array([ - ('John', 25, 'New York', 50000), - ('Jane', 30, 'San Francisco', 75000), - ('Bob', 35, 'Chicago', 65000), - ('Alice', 28, 'Los Angeles', 60000) - ], dtype=[('name', 'U10'), ('age', 'i4'), ('city', 'U15'), ('salary', 'i4')]) + data = np.array( + [ + ("John", 25, "New York", 50000), + ("Jane", 30, "San Francisco", 75000), + ("Bob", 35, "Chicago", 65000), + ("Alice", 28, "Los Angeles", 60000), + ], + dtype=[("name", "U10"), ("age", "i4"), ("city", "U15"), ("salary", "i4")], + ) - columns = ['name', 'salary', 'city'] + columns = ["name", "salary", "city"] indexed_columns = ensure_index(columns) actual_arrays, actual_cols = to_arrays(data, indexed_columns) - expected_arrays = [array(['John', 'Jane', 'Bob', 'Alice'], dtype=' Date: Sun, 15 Sep 2024 13:53:48 -0400 Subject: [PATCH 04/10] fix sort --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 89a1c388b3ba1..baea47ef090ea 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -624,6 +624,7 @@ I/O - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) +- Bug in :meth:`from_records` where columns parameter with numpy array data was not reordeing and filtering out the columns (:issue:`59717`) - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) From 7a054fbb0df03f4426132540829355d744488933 Mon Sep 17 00:00:00 2001 From: VibavariG Date: Sun, 15 Sep 2024 15:09:05 -0400 Subject: [PATCH 05/10] Update datatype to int32 --- pandas/tests/frame/methods/test_to_arrays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_to_arrays.py b/pandas/tests/frame/methods/test_to_arrays.py index 3b9669424255e..6470031cbcfff 100644 --- a/pandas/tests/frame/methods/test_to_arrays.py +++ b/pandas/tests/frame/methods/test_to_arrays.py @@ -24,7 +24,7 @@ def test_to_arrays(): actual_arrays, actual_cols = to_arrays(data, indexed_columns) expected_arrays = [ array(["John", "Jane", "Bob", "Alice"], dtype=" Date: Sun, 15 Sep 2024 16:06:54 -0400 Subject: [PATCH 06/10] Fis test --- pandas/tests/series/indexing/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 71ba2dab671ef..156fdcba0402a 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1446,7 +1446,7 @@ def obj(self): marks=pytest.mark.xfail( ( not np_version_gte1p24 - or (np_version_gte1p24 and np._get_promotion_state() != "weak") + # or (np_version_gte1p24 and np._get_promotion_state() != "weak") ), reason="np.float32(1.1) ends up as 1.100000023841858, so " "np_can_hold_element raises and we cast to float64", From b9e7af2a67fedc26692a57cf6122092c93380a88 Mon Sep 17 00:00:00 2001 From: VibavariG Date: Sun, 15 Sep 2024 18:29:49 -0400 Subject: [PATCH 07/10] Revert commit --- pandas/tests/series/indexing/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 156fdcba0402a..71ba2dab671ef 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1446,7 +1446,7 @@ def obj(self): marks=pytest.mark.xfail( ( not np_version_gte1p24 - # or (np_version_gte1p24 and np._get_promotion_state() != "weak") + or (np_version_gte1p24 and np._get_promotion_state() != "weak") ), reason="np.float32(1.1) ends up as 1.100000023841858, so " "np_can_hold_element raises and we cast to float64", From 35261c0f18cd933d2b19c1eb8f8d2c76de55f915 Mon Sep 17 00:00:00 2001 From: VibavariG Date: Tue, 17 Sep 2024 20:41:48 -0400 Subject: [PATCH 08/10] Add test for DaaFrame.from_records() --- .../frame/constructors/test_from_records.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index abc3aab1c1492..1d4a2c0075e3e 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -469,3 +469,26 @@ def test_from_records_empty2(self): alt = DataFrame(arr) tm.assert_frame_equal(alt, expected) + + def test_from_records_structured_array(self): + # GH 59717 + data = np.array( + [ + ("John", 25, "New York", 50000), + ("Jane", 30, "San Francisco", 75000), + ("Bob", 35, "Chicago", 65000), + ("Alice", 28, "Los Angeles", 60000), + ], + dtype=[("name", "U10"), ("age", "i4"), ("city", "U15"), ("salary", "i4")], + ) + + actual_result = DataFrame.from_records(data, columns=["name", "salary", "city"]) + + modified_data = { + "name": ["John", "Jane", "Bob", "Alice"], + "salary": np.array([50000, 75000, 65000, 60000], dtype="int32"), + "city": ["New York", "San Francisco", "Chicago", "Los Angeles"], + } + expected_result = DataFrame(modified_data) + + tm.assert_frame_equal(actual_result, expected_result) From 566ca76e3e31828544cdd58feddef6c0e9fe91d1 Mon Sep 17 00:00:00 2001 From: VibavariG Date: Tue, 17 Sep 2024 20:42:31 -0400 Subject: [PATCH 09/10] Apply comments --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/internals/construction.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index baea47ef090ea..279c1113c1b43 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -619,12 +619,12 @@ I/O ^^^ - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`) - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) +- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) -- Bug in :meth:`from_records` where columns parameter with numpy array data was not reordeing and filtering out the columns (:issue:`59717`) - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 81f17bd10e612..959e572b2b35b 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -751,7 +751,7 @@ def to_arrays( elif isinstance(data, np.ndarray) and data.dtype.names is not None: # e.g. recarray if columns is None: - columns = Index(list(data.dtype.names)) + columns = Index(data.dtype.names) arrays = [data[k] for k in columns] return arrays, columns From 3eddd981c29916d953ce9d895402c2b0f109b0e4 Mon Sep 17 00:00:00 2001 From: VibavariG Date: Wed, 18 Sep 2024 19:44:24 -0400 Subject: [PATCH 10/10] Delete test_to_arrays.py --- pandas/tests/frame/methods/test_to_arrays.py | 34 -------------------- 1 file changed, 34 deletions(-) delete mode 100644 pandas/tests/frame/methods/test_to_arrays.py diff --git a/pandas/tests/frame/methods/test_to_arrays.py b/pandas/tests/frame/methods/test_to_arrays.py deleted file mode 100644 index 6470031cbcfff..0000000000000 --- a/pandas/tests/frame/methods/test_to_arrays.py +++ /dev/null @@ -1,34 +0,0 @@ -import numpy as np -from numpy import array - -import pandas._testing as tm -from pandas.core.indexes.api import ensure_index -from pandas.core.internals.construction import to_arrays - - -def test_to_arrays(): - # GH 59717 - data = np.array( - [ - ("John", 25, "New York", 50000), - ("Jane", 30, "San Francisco", 75000), - ("Bob", 35, "Chicago", 65000), - ("Alice", 28, "Los Angeles", 60000), - ], - dtype=[("name", "U10"), ("age", "i4"), ("city", "U15"), ("salary", "i4")], - ) - - columns = ["name", "salary", "city"] - indexed_columns = ensure_index(columns) - - actual_arrays, actual_cols = to_arrays(data, indexed_columns) - expected_arrays = [ - array(["John", "Jane", "Bob", "Alice"], dtype="