Skip to content

Commit 3927f7e

Browse files
committed
Fix _apply_timezone bug during s3.read_parquet() #417
1 parent 8eb3160 commit 3927f7e

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

awswrangler/s3/_read_parquet.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,14 @@ def _apply_index(df: pd.DataFrame, metadata: Dict[str, Any]) -> pd.DataFrame:
184184
if col["kind"] == "range":
185185
df.index = pd.RangeIndex(start=col["start"], stop=col["stop"], step=col["step"])
186186
ignore_index = False
187-
if col["name"] is not None and col["name"].startswith("__index_level_") is False:
188-
df.index.name = col["name"]
187+
col_name: Optional[str] = None
188+
if "name" in col and col["name"] is not None:
189+
col_name = str(col["name"])
190+
elif "field_name" in col and col["field_name"] is not None:
191+
col_name = str(col["field_name"])
192+
if col_name is not None and col_name.startswith("__index_level_") is False:
193+
df.index.name = col_name
194+
189195
df.index.names = [None if n is not None and n.startswith("__index_level_") else n for n in df.index.names]
190196

191197
with warnings.catch_warnings():
@@ -196,12 +202,18 @@ def _apply_index(df: pd.DataFrame, metadata: Dict[str, Any]) -> pd.DataFrame:
196202

197203
def _apply_timezone(df: pd.DataFrame, metadata: Dict[str, Any]) -> pd.DataFrame:
198204
for c in metadata["columns"]:
199-
if c["field_name"] in df.columns and c["pandas_type"] == "datetimetz":
205+
if "field_name" in c and c["field_name"] is not None:
206+
col_name = str(c["field_name"])
207+
elif "name" in c and c["name"] is not None:
208+
col_name = str(c["name"])
209+
else:
210+
continue
211+
if col_name in df.columns and c["pandas_type"] == "datetimetz":
200212
timezone: datetime.tzinfo = pa.lib.string_to_tzinfo(c["metadata"]["timezone"])
201-
_logger.debug("applying timezone (%s) on column %s", timezone, c["field_name"])
202-
if hasattr(df[c["field_name"]].dtype, "tz") is False:
203-
df[c["field_name"]] = df[c["field_name"]].dt.tz_localize(tz="UTC")
204-
df[c["field_name"]] = df[c["field_name"]].dt.tz_convert(tz=timezone)
213+
_logger.debug("applying timezone (%s) on column %s", timezone, col_name)
214+
if hasattr(df[col_name].dtype, "tz") is False:
215+
df[col_name] = df[col_name].dt.tz_localize(tz="UTC")
216+
df[col_name] = df[col_name].dt.tz_convert(tz=timezone)
205217
return df
206218

207219

0 commit comments

Comments
 (0)