Skip to content

Commit f24ab96

Browse files
committed
Add thousands and decimal params to Pandas.read_csv()
1 parent f0aaa0d commit f24ab96

File tree

5 files changed

+51
-15
lines changed

5 files changed

+51
-15
lines changed

awswrangler/pandas.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def read_csv(
5252
usecols=None,
5353
dtype=None,
5454
sep=",",
55+
thousands=None,
56+
decimal=".",
5557
lineterminator="\n",
5658
quotechar='"',
5759
quoting=csv.QUOTE_MINIMAL,
@@ -74,6 +76,8 @@ def read_csv(
7476
:param usecols: Same as pandas.read_csv()
7577
:param dtype: Same as pandas.read_csv()
7678
:param sep: Same as pandas.read_csv()
79+
:param thousands: Same as pandas.read_csv()
80+
:param decimal: Same as pandas.read_csv()
7781
:param lineterminator: Same as pandas.read_csv()
7882
:param quotechar: Same as pandas.read_csv()
7983
:param quoting: Same as pandas.read_csv()
@@ -98,6 +102,8 @@ def read_csv(
98102
usecols=usecols,
99103
dtype=dtype,
100104
sep=sep,
105+
thousands=thousands,
106+
decimal=decimal,
101107
lineterminator=lineterminator,
102108
quotechar=quotechar,
103109
quoting=quoting,
@@ -115,6 +121,8 @@ def read_csv(
115121
usecols=usecols,
116122
dtype=dtype,
117123
sep=sep,
124+
thousands=thousands,
125+
decimal=decimal,
118126
lineterminator=lineterminator,
119127
quotechar=quotechar,
120128
quoting=quoting,
@@ -136,6 +144,8 @@ def _read_csv_iterator(
136144
usecols=None,
137145
dtype=None,
138146
sep=",",
147+
thousands=None,
148+
decimal=".",
139149
lineterminator="\n",
140150
quotechar='"',
141151
quoting=csv.QUOTE_MINIMAL,
@@ -159,6 +169,8 @@ def _read_csv_iterator(
159169
:param usecols: Same as pandas.read_csv()
160170
:param dtype: Same as pandas.read_csv()
161171
:param sep: Same as pandas.read_csv()
172+
:param thousands: Same as pandas.read_csv()
173+
:param decimal: Same as pandas.read_csv()
162174
:param lineterminator: Same as pandas.read_csv()
163175
:param quotechar: Same as pandas.read_csv()
164176
:param quoting: Same as pandas.read_csv()
@@ -184,6 +196,8 @@ def _read_csv_iterator(
184196
usecols=usecols,
185197
dtype=dtype,
186198
sep=sep,
199+
thousands=thousands,
200+
decimal=decimal,
187201
lineterminator=lineterminator,
188202
quotechar=quotechar,
189203
quoting=quoting,
@@ -231,6 +245,8 @@ def _read_csv_iterator(
231245
names=names,
232246
usecols=usecols,
233247
sep=sep,
248+
thousands=thousands,
249+
decimal=decimal,
234250
quotechar=quotechar,
235251
quoting=quoting,
236252
escapechar=escapechar,
@@ -340,6 +356,8 @@ def _read_csv_once(
340356
usecols=None,
341357
dtype=None,
342358
sep=",",
359+
thousands=None,
360+
decimal=".",
343361
lineterminator="\n",
344362
quotechar='"',
345363
quoting=0,
@@ -362,6 +380,8 @@ def _read_csv_once(
362380
:param usecols: Same as pandas.read_csv()
363381
:param dtype: Same as pandas.read_csv()
364382
:param sep: Same as pandas.read_csv()
383+
:param thousands: Same as pandas.read_csv()
384+
:param decimal: Same as pandas.read_csv()
365385
:param lineterminator: Same as pandas.read_csv()
366386
:param quotechar: Same as pandas.read_csv()
367387
:param quoting: Same as pandas.read_csv()
@@ -381,6 +401,8 @@ def _read_csv_once(
381401
names=names,
382402
usecols=usecols,
383403
sep=sep,
404+
thousands=thousands,
405+
decimal=decimal,
384406
quotechar=quotechar,
385407
quoting=quoting,
386408
escapechar=escapechar,

requirements-dev.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
yapf~=0.28.0
2-
mypy~=0.730
2+
mypy~=0.740
33
flake8~=3.7.8
44
pytest-cov~=2.8.1
5-
cfn-lint~=0.23.3
6-
twine~=1.13.0
5+
cfn-lint~=0.24.4
6+
twine~=2.0.0
77
wheel~=0.33.6
8-
sphinx~=2.1.2
8+
sphinx~=2.2.0
99
pyspark~=2.4.4
1010
pyspark-stubs~=2.4.0

requirements.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
numpy~=1.17.2
2-
pandas~=0.25.1
1+
numpy~=1.17.3
2+
pandas~=0.25.2
33
pyarrow~=0.14.0
4-
botocore~=1.12.239
5-
boto3~=1.9.239
6-
s3fs~=0.3.4
4+
botocore~=1.12.253
5+
boto3~=1.9.253
6+
s3fs~=0.3.5
77
tenacity~=5.1.1
88
pg8000~=1.13.2

setup.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121
packages=find_packages(include=["awswrangler", "awswrangler.*"], exclude=["tests"]),
2222
python_requires=">=3.6",
2323
install_requires=[
24-
"numpy~=1.17.2",
25-
"pandas~=0.25.1",
24+
"numpy~=1.17.3",
25+
"pandas~=0.25.2",
2626
"pyarrow~=0.14.0",
27-
"botocore~=1.12.239",
28-
"boto3~=1.9.239",
29-
"s3fs~=0.3.4",
27+
"botocore~=1.12.253",
28+
"boto3~=1.9.253",
29+
"s3fs~=0.3.5",
3030
"tenacity~=5.1.1",
3131
"pg8000~=1.13.2",
3232
],

testing/test_awswrangler/test_pandas.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,20 @@ def test_read_csv_iterator_usecols(session, bucket, sample, row_num):
130130
assert total_count == row_num
131131

132132

133+
def test_read_csv_thousands_and_decimal(session, bucket):
134+
text = "col1;col2\n1.000.000,00;2.000.000,00\n3.000.000,00;4.000.000,00"
135+
filename = "test_read_csv_thousands_and_decimal/sample.txt"
136+
boto3.resource("s3").Object(bucket, filename).put(Body=text)
137+
path = f"s3://{bucket}/{filename}"
138+
df = session.pandas.read_csv(path=path, sep=";", thousands=".", decimal=",")
139+
assert len(df.index) == 2
140+
assert len(df.columns) == 2
141+
assert df.iloc[0].col1 == 1_000_000
142+
assert df.iloc[0].col2 == 2_000_000
143+
assert df.iloc[1].col1 == 3_000_000
144+
assert df.iloc[1].col2 == 4_000_000
145+
146+
133147
@pytest.mark.parametrize(
134148
"mode, file_format, preserve_index, partition_cols, procs_cpu_bound, factor",
135149
[
@@ -437,7 +451,7 @@ def test_to_parquet_with_empty_dataframe(session, bucket, database):
437451
procs_cpu_bound=1)
438452

439453

440-
def test_read_log_query(session, loggroup, logstream):
454+
def test_read_log_query(session, loggroup):
441455
dataframe = session.pandas.read_log_query(
442456
log_group_names=[loggroup],
443457
query="fields @timestamp, @message | sort @timestamp desc | limit 5",

0 commit comments

Comments
 (0)