diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c2a56afbc580e..cd2d3ddf93c2a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -619,6 +619,7 @@ I/O ^^^ - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`) - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) +- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 07465e7b87fcd..959e572b2b35b 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -750,7 +750,8 @@ def to_arrays( elif isinstance(data, np.ndarray) and data.dtype.names is not None: # e.g. recarray - columns = Index(list(data.dtype.names)) + if columns is None: + columns = Index(data.dtype.names) arrays = [data[k] for k in columns] return arrays, columns diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index abc3aab1c1492..1d4a2c0075e3e 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -469,3 +469,26 @@ def test_from_records_empty2(self): alt = DataFrame(arr) tm.assert_frame_equal(alt, expected) + + def test_from_records_structured_array(self): + # GH 59717 + data = np.array( + [ + ("John", 25, "New York", 50000), + ("Jane", 30, "San Francisco", 75000), + ("Bob", 35, "Chicago", 65000), + ("Alice", 28, "Los Angeles", 60000), + ], + dtype=[("name", "U10"), ("age", "i4"), ("city", "U15"), ("salary", "i4")], + ) + + actual_result = DataFrame.from_records(data, columns=["name", "salary", "city"]) + + modified_data = { + "name": ["John", "Jane", "Bob", "Alice"], + "salary": np.array([50000, 75000, 65000, 60000], dtype="int32"), + "city": ["New York", "San Francisco", "Chicago", "Los Angeles"], + } + expected_result = DataFrame(modified_data) + + tm.assert_frame_equal(actual_result, expected_result)