Skip to content

Commit 3f626b8

Browse files
Merge branch 'main' into release-3.0.0
2 parents 78a3734 + 691a050 commit 3f626b8

File tree

11 files changed

+101
-21
lines changed

11 files changed

+101
-21
lines changed

.github/workflows/minimum-response-time.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ jobs:
1414
with:
1515
exempt_user_list: "github-actions[bot]"
1616
exempt_labels: "help wanted"
17-
exempt_authors: "malachi-constant,jaidisido,kukushking,cnfait,dependabot[bot]"
17+
exempt_authors: "malachi-constant,jaidisido,kukushking,LeonLuttenberger,cnfait,dependabot[bot]"
1818
token: ${{secrets.GITHUB_TOKEN}}
1919
label: needs-triage

awswrangler/_config.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ class _ConfigArg(NamedTuple):
4848
"lakeformation_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
4949
"dynamodb_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
5050
"secretsmanager_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
51-
"timestream_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
51+
"timestream_query_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True),
52+
"timestream_write_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True),
5253
# Botocore config
5354
"botocore_config": _ConfigArg(dtype=botocore.config.Config, nullable=True),
5455
"verify": _ConfigArg(dtype=str, nullable=True, loaded=True),
@@ -70,6 +71,20 @@ class _Config: # pylint: disable=too-many-instance-attributes,too-many-public-m
7071
def __init__(self) -> None:
7172
self._loaded_values: Dict[str, _ConfigValueType] = {}
7273
name: str
74+
self.s3_endpoint_url = None
75+
self.athena_endpoint_url = None
76+
self.sts_endpoint_url = None
77+
self.glue_endpoint_url = None
78+
self.redshift_endpoint_url = None
79+
self.kms_endpoint_url = None
80+
self.emr_endpoint_url = None
81+
self.lakeformation_endpoint_url = None
82+
self.dynamodb_endpoint_url = None
83+
self.secretsmanager_endpoint_url = None
84+
self.timestream_write_endpoint_url = None
85+
self.timestream_query_endpoint_url = None
86+
self.botocore_config = None
87+
self.verify = None
7388
for name in _CONFIG_ARGS:
7489
self._load_config(name=name)
7590

@@ -392,13 +407,22 @@ def secretsmanager_endpoint_url(self, value: Optional[str]) -> None:
392407
self._set_config_value(key="secretsmanager_endpoint_url", value=value)
393408

394409
@property
395-
def timestream_endpoint_url(self) -> Optional[str]:
396-
"""Property timestream_endpoint_url."""
397-
return cast(Optional[str], self["timestream_endpoint_url"])
410+
def timestream_query_endpoint_url(self) -> Optional[str]:
411+
"""Property timestream_query_endpoint_url."""
412+
return cast(Optional[str], self["timestream_query_endpoint_url"])
398413

399-
@timestream_endpoint_url.setter
400-
def timestream_endpoint_url(self, value: Optional[str]) -> None:
401-
self._set_config_value(key="timestream_endpoint_url", value=value)
414+
@timestream_query_endpoint_url.setter
415+
def timestream_query_endpoint_url(self, value: Optional[str]) -> None:
416+
self._set_config_value(key="timestream_query_endpoint_url", value=value)
417+
418+
@property
419+
def timestream_write_endpoint_url(self) -> Optional[str]:
420+
"""Property timestream_write_endpoint_url."""
421+
return cast(Optional[str], self["timestream_write_endpoint_url"])
422+
423+
@timestream_write_endpoint_url.setter
424+
def timestream_write_endpoint_url(self, value: Optional[str]) -> None:
425+
self._set_config_value(key="timestream_write_endpoint_url", value=value)
402426

403427
@property
404428
def botocore_config(self) -> botocore.config.Config:

awswrangler/_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,11 @@ def _get_endpoint_url(service_name: str) -> Optional[str]:
102102
endpoint_url = _config.config.dynamodb_endpoint_url
103103
elif service_name == "secretsmanager" and _config.config.secretsmanager_endpoint_url is not None:
104104
endpoint_url = _config.config.secretsmanager_endpoint_url
105-
elif service_name == "timestream" and _config.config.timestream_endpoint_url is not None:
106-
endpoint_url = _config.config.timestream_endpoint_url
105+
elif service_name == "timestream-write" and _config.config.timestream_write_endpoint_url is not None:
106+
endpoint_url = _config.config.timestream_write_endpoint_url
107+
elif service_name == "timestream-query" and _config.config.timestream_query_endpoint_url is not None:
108+
endpoint_url = _config.config.timestream_query_endpoint_url
109+
107110
return endpoint_url
108111

109112

awswrangler/redshift.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ def _validate_parameters(
219219
distkey: Optional[str],
220220
sortstyle: str,
221221
sortkey: Optional[List[str]],
222+
primary_keys: Optional[List[str]],
222223
) -> None:
223224
if diststyle not in _RS_DISTSTYLES:
224225
raise exceptions.InvalidRedshiftDiststyle(f"diststyle must be in {_RS_DISTSTYLES}")
@@ -240,6 +241,14 @@ def _validate_parameters(
240241
raise exceptions.InvalidRedshiftSortkey(
241242
f"sortkey must be a List of items in the columns list: {cols}. " f"Currently value: {key}"
242243
)
244+
if primary_keys:
245+
if not isinstance(primary_keys, list):
246+
raise exceptions.InvalidArgumentType(
247+
f"""
248+
primary keys should be of type list[str].
249+
Current value: {primary_keys} is of type {type(primary_keys)}
250+
"""
251+
)
243252

244253

245254
def _redshift_types_from_path(
@@ -379,6 +388,7 @@ def _create_table( # pylint: disable=too-many-locals,too-many-arguments,too-man
379388
distkey=distkey,
380389
sortstyle=sortstyle,
381390
sortkey=sortkey,
391+
primary_keys=primary_keys,
382392
)
383393
cols_str: str = "".join([f'"{k}" {v},\n' for k, v in redshift_types.items()])[:-2]
384394
primary_keys_str: str = f",\nPRIMARY KEY ({', '.join(primary_keys)})" if primary_keys else ""

awswrangler/s3/_fs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def open_s3_object(
555555
newline: Optional[str] = "\n",
556556
encoding: Optional[str] = "utf-8",
557557
) -> Iterator[Union[_S3ObjectBase, io.TextIOWrapper]]:
558-
"""Return a _S3Object or TextIOWrapper based in the received mode."""
558+
"""Return a _S3Object or TextIOWrapper based on the received mode."""
559559
s3obj: Optional[_S3ObjectBase] = None
560560
text_s3obj: Optional[io.TextIOWrapper] = None
561561
try:

awswrangler/s3/_read_text.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def read_csv(
230230
The requested parallelism of the read. Only used when `distributed` add-on is installed.
231231
Parallelism may be limited by the number of files of the dataset. 200 by default.
232232
pandas_kwargs :
233-
KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicit, just add valid
233+
KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicitly, just add valid
234234
Pandas arguments in the function call and awswrangler will accept it.
235235
e.g. wr.s3.read_csv('s3://bucket/prefix/', sep='|', na_values=['null', 'none'], skip_blank_lines=True)
236236
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
@@ -273,7 +273,7 @@ def read_csv(
273273
"""
274274
if "pandas_kwargs" in pandas_kwargs:
275275
raise exceptions.InvalidArgument(
276-
"You can NOT pass `pandas_kwargs` explicit, just add valid "
276+
"You can NOT pass `pandas_kwargs` explicitly, just add valid "
277277
"Pandas arguments in the function call and awswrangler will accept it."
278278
"e.g. wr.s3.read_csv('s3://bucket/prefix/', sep='|', skip_blank_lines=True)"
279279
)

awswrangler/s3/_read_text_core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
def _get_read_details(path: str, pandas_kwargs: Dict[str, Any]) -> Tuple[str, Optional[str], Optional[str]]:
1919
if pandas_kwargs.get("compression", "infer") == "infer":
2020
pandas_kwargs["compression"] = infer_compression(path, compression="infer")
21-
mode: str = "r" if pandas_kwargs.get("compression") is None else "rb"
21+
mode: str = (
22+
"r" if pandas_kwargs.get("compression") is None and pandas_kwargs.get("encoding_errors") != "ignore" else "rb"
23+
)
2224
encoding: Optional[str] = pandas_kwargs.get("encoding", "utf-8")
2325
newline: Optional[str] = pandas_kwargs.get("lineterminator", None)
2426
return mode, encoding, newline

poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/unit/test_config.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@ def wrapper(self, **kwarg):
3131
assert url == wr.config.glue_endpoint_url
3232
elif name == "secretsmanager":
3333
assert url == wr.config.secretsmanager_endpoint_url
34-
elif name == "timestream":
35-
assert url == wr.config.timestream_endpoint_url
34+
elif name == "timestream-write":
35+
assert url == wr.config.timestream_write_endpoint_url
36+
elif name == "timestream-query":
37+
assert url == wr.config.timestream_query_endpoint_url
3638
return original(self, **kwarg)
3739

3840
with patch("botocore.client.ClientCreator.create_client", new=wrapper):
@@ -117,14 +119,16 @@ def test_basics(path, glue_database, glue_table, workgroup0, workgroup1):
117119
wr.config.athena_endpoint_url = f"https://athena.{region}.amazonaws.com"
118120
wr.config.glue_endpoint_url = f"https://glue.{region}.amazonaws.com"
119121
wr.config.secretsmanager_endpoint_url = f"https://secretsmanager.{region}.amazonaws.com"
120-
wr.config.timestream_endpoint_url = f"https://timestream.{region}.amazonaws.com"
122+
wr.config.timestream_write_endpoint_url = f"https://ingest.timestream.{region}.amazonaws.com"
123+
wr.config.timestream_query_endpoint_url = f"https://query.timestream.{region}.amazonaws.com"
121124
_urls_test(glue_database)
122125
os.environ["WR_STS_ENDPOINT_URL"] = f"https://sts.{region}.amazonaws.com"
123126
os.environ["WR_S3_ENDPOINT_URL"] = f"https://s3.{region}.amazonaws.com"
124127
os.environ["WR_ATHENA_ENDPOINT_URL"] = f"https://athena.{region}.amazonaws.com"
125128
os.environ["WR_GLUE_ENDPOINT_URL"] = f"https://glue.{region}.amazonaws.com"
126129
os.environ["WR_SECRETSMANAGER_ENDPOINT_URL"] = f"https://secretsmanager.{region}.amazonaws.com"
127-
os.environ["WR_TIMESTREAM_ENDPOINT_URL"] = f"https://timestream.{region}.amazonaws.com"
130+
os.environ["WR_TIMESTREAM_WRITE_ENDPOINT_URL"] = f"https://ingest.timestream.{region}.amazonaws.com"
131+
os.environ["WR_TIMESTREAM_QUERY_ENDPOINT_URL"] = f"https://query.timestream.{region}.amazonaws.com"
128132
wr.config.reset()
129133
_urls_test(glue_database)
130134

tests/unit/test_s3_parquet.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,3 +598,21 @@ def test_read_parquet_versioned(path) -> None:
598598
df_temp = wr.s3.read_parquet(path_file, version_id=version_id)
599599
assert df_temp.equals(df)
600600
assert version_id == wr.s3.describe_objects(path=path_file, version_id=version_id)[path_file]["VersionId"]
601+
602+
603+
def test_read_parquet_schema_validation_with_index_column(path) -> None:
604+
path_file = f"{path}file.parquet"
605+
df = pd.DataFrame({"idx": [1], "col": [2]})
606+
df0 = df.set_index("idx")
607+
wr.s3.to_parquet(
608+
df=df0,
609+
path=path_file,
610+
index=True,
611+
)
612+
df1 = wr.s3.read_parquet(
613+
path=path_file,
614+
ignore_index=False,
615+
columns=["idx", "col"],
616+
validate_schema=True,
617+
)
618+
assert df0.shape == df1.shape

0 commit comments

Comments
 (0)