|
28 | 28 | def _get_read_details(path: str, pandas_kwargs: Dict[str, Any]) -> Tuple[str, Optional[str], Optional[str]]: |
29 | 29 | if pandas_kwargs.get("compression", "infer") == "infer": |
30 | 30 | pandas_kwargs["compression"] = infer_compression(path, compression="infer") |
31 | | - mode: str = "r" if pandas_kwargs.get("compression") is None else "rb" |
| 31 | + mode: str = ( |
| 32 | + "r" if pandas_kwargs.get("compression") is None and pandas_kwargs.get("encoding_errors") != "ignore" else "rb" |
| 33 | + ) |
32 | 34 | encoding: Optional[str] = pandas_kwargs.get("encoding", "utf-8") |
33 | 35 | newline: Optional[str] = pandas_kwargs.get("lineterminator", None) |
34 | 36 | return mode, encoding, newline |
@@ -249,7 +251,7 @@ def read_csv( |
249 | 251 | E.g ``lambda x: True if x["year"] == "2020" and x["month"] == "1" else False`` |
250 | 252 | https://aws-sdk-pandas.readthedocs.io/en/2.17.0/tutorials/023%20-%20Flexible%20Partitions%20Filter.html |
251 | 253 | pandas_kwargs : |
252 | | - KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicit, just add valid |
| 254 | + KEYWORD arguments forwarded to pandas.read_csv(). You can NOT pass `pandas_kwargs` explicitly, just add valid |
253 | 255 | Pandas arguments in the function call and awswrangler will accept it. |
254 | 256 | e.g. wr.s3.read_csv('s3://bucket/prefix/', sep='|', na_values=['null', 'none'], skip_blank_lines=True) |
255 | 257 | https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html |
@@ -292,7 +294,7 @@ def read_csv( |
292 | 294 | """ |
293 | 295 | if "pandas_kwargs" in pandas_kwargs: |
294 | 296 | raise exceptions.InvalidArgument( |
295 | | - "You can NOT pass `pandas_kwargs` explicit, just add valid " |
| 297 | + "You can NOT pass `pandas_kwargs` explicitly, just add valid " |
296 | 298 | "Pandas arguments in the function call and awswrangler will accept it." |
297 | 299 | "e.g. wr.s3.read_csv('s3://bucket/prefix/', sep='|', skip_blank_lines=True)" |
298 | 300 | ) |
|
0 commit comments