Skip to content

Commit af9ccf6

Browse files
committed
Forcing read_parquet_metadat determinism #449
1 parent 98f5ead commit af9ccf6

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

awswrangler/_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ def get_region_from_session(boto3_session: Optional[boto3.Session] = None, defau
238238

239239
def list_sampling(lst: List[Any], sampling: float) -> List[Any]:
240240
"""Random List sampling."""
241+
if sampling == 1.0:
242+
return lst
241243
if sampling > 1.0 or sampling <= 0.0:
242244
raise exceptions.InvalidArgumentValue(f"Argument <sampling> must be [0.0 < value <= 1.0]. {sampling} received.")
243245
_len: int = len(lst)
@@ -249,7 +251,9 @@ def list_sampling(lst: List[Any], sampling: float) -> List[Any]:
249251
_logger.debug("_len: %s", _len)
250252
_logger.debug("sampling: %s", sampling)
251253
_logger.debug("num_samples: %s", num_samples)
252-
return random.sample(population=lst, k=num_samples)
254+
random_lst: List[Any] = random.sample(population=lst, k=num_samples)
255+
random_lst.sort()
256+
return random_lst
253257

254258

255259
def ensure_df_is_mutable(df: pd.DataFrame) -> pd.DataFrame:

tests/test_glue.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import logging
2+
3+
import pandas as pd
4+
5+
import awswrangler as wr
6+
7+
logging.getLogger("awswrangler").setLevel(logging.DEBUG)
8+
9+
10+
def test_parquet_crawler_columns(path):
11+
df = pd.DataFrame({"c0": [0, 1], "c1": [2, 3]})
12+
wr.s3.to_parquet(df, path, dataset=True, mode="overwrite")
13+
df = pd.DataFrame({"c1": [2, 3], "c0": [0, 1]})
14+
wr.s3.to_parquet(df, path, dataset=True, mode="append")
15+
first_schema = wr.s3.read_parquet_metadata(path=path)[0]
16+
for _ in range(10):
17+
schema = wr.s3.read_parquet_metadata(path=path)[0]
18+
assert list(schema.keys()) == list(first_schema.keys())

0 commit comments

Comments
 (0)