Skip to content

Commit 4eadf30

Browse files
authored
Revert "fix: Ignore index for multi-file datasets in distributed mode (#2266)" (#2277)
This reverts commit 56a55a8.
1 parent b38779c commit 4eadf30

File tree

2 files changed

+1
-8
lines changed

2 files changed

+1
-8
lines changed

awswrangler/distributed/ray/modin/s3/_read_parquet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,5 @@ def _read_parquet_distributed( # pylint: disable=unused-argument
5555
return _to_modin(
5656
dataset=dataset,
5757
to_pandas_kwargs=arrow_kwargs,
58-
ignore_index=bool(path_root),
58+
ignore_index=arrow_kwargs.get("ignore_metadata"),
5959
)

tests/unit/test_s3_parquet.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,6 @@ def test_parquet_with_size(path, use_threads, max_rows_by_file):
360360
assert df.iint8.sum() == df2.iint8.sum()
361361

362362

363-
@pytest.mark.xfail(is_ray_modin, raises=AssertionError, reason="Index equality regression")
364363
@pytest.mark.parametrize("use_threads", [True, False, 2])
365364
def test_index_and_timezone(path, use_threads):
366365
df = pd.DataFrame({"c0": [datetime.utcnow(), datetime.utcnow()], "par": ["a", "b"]}, index=["foo", "boo"])
@@ -397,7 +396,6 @@ def test_index_recovery_simple_str(path, use_threads):
397396
assert_pandas_equals(df, df2)
398397

399398

400-
@pytest.mark.xfail(is_ray_modin, raises=AssertionError, reason="Index equality regression")
401399
@pytest.mark.parametrize("use_threads", [True, False, 2])
402400
def test_index_recovery_partitioned_str(path, use_threads):
403401
df = pd.DataFrame(
@@ -625,11 +623,6 @@ def test_parquet_compression(path, compression) -> None:
625623
assert_pandas_equals(df, df2)
626624

627625

628-
@pytest.mark.xfail(
629-
is_ray_modin,
630-
raises=AssertionError,
631-
reason="Dataframe indexes are not equal in distributed mode",
632-
)
633626
@pytest.mark.parametrize("use_threads", [True, False, 2])
634627
def test_empty_file(path, use_threads):
635628
df = pd.DataFrame({"c0": [1, 2, 3], "c1": [None, None, None], "par": ["a", "b", "c"]})

0 commit comments

Comments
 (0)