Skip to content

Commit 5d95ac8

Browse files
authored
Convert Pandas json input to StringIO to avoid bug (#67)
Working around bug in Pandas with urls in the json string
1 parent c0d5042 commit 5d95ac8

File tree

3 files changed

+41
-2
lines changed

3 files changed

+41
-2
lines changed

inference_schema/parameter_types/pandas_parameter_type.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .abstract_parameter_type import AbstractParameterType
88
from ._util import get_swagger_for_list, get_swagger_for_nested_dict
99
from ._constants import SWAGGER_FORMAT_CONSTANTS
10+
from io import StringIO
1011
from warnings import warn
1112

1213

@@ -78,7 +79,8 @@ def deserialize_input(self, input_data):
7879
if not isinstance(input_data, list) and not isinstance(input_data, dict):
7980
raise Exception("Error, unable to convert input of type {} into Pandas Dataframe".format(type(input_data)))
8081

81-
data_frame = pd.read_json(json.dumps(input_data), orient=self.orient)
82+
string_stream = StringIO(json.dumps(input_data))
83+
data_frame = pd.read_json(string_stream, orient=self.orient)
8284

8385
if self.apply_column_names:
8486
data_frame.columns = self.sample_input.columns.copy()

tests/conftest.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ def pandas_sample_input_int_column_labels():
6363
return pd.DataFrame(data=pandas_input_data)
6464

6565

66+
@pytest.fixture(scope="session")
67+
def pandas_sample_input_with_url():
68+
pandas_input_data = {'state': ['WA'], 'website': ['http://wa.website.foo']}
69+
return pd.DataFrame(data=pandas_input_data)
70+
71+
6672
@pytest.fixture(scope="session")
6773
def decorated_pandas_func(pandas_sample_input, pandas_sample_output):
6874
@input_schema('param', PandasParameterType(pandas_sample_input))
@@ -122,7 +128,6 @@ def pandas_split_orient_func(param):
122128

123129
@pytest.fixture(scope="session")
124130
def decorated_pandas_func_int_column_labels(pandas_sample_input_int_column_labels):
125-
126131
@input_schema('param', PandasParameterType(pandas_sample_input_int_column_labels))
127132
def pandas_int_column_labels_func(param):
128133
"""
@@ -139,6 +144,23 @@ def pandas_int_column_labels_func(param):
139144
return pandas_int_column_labels_func
140145

141146

147+
@pytest.fixture(scope="session")
148+
def decorated_pandas_uri_func(pandas_sample_input_with_url):
149+
@input_schema('param', PandasParameterType(pandas_sample_input_with_url))
150+
def pandas_url_func(param):
151+
"""
152+
153+
:param param:
154+
:type param: pd.DataFrame
155+
:return:
156+
:rtype: string
157+
"""
158+
assert type(param) is pd.DataFrame
159+
return param['website'][0]
160+
161+
return pandas_url_func
162+
163+
142164
@pytest.fixture(scope="session")
143165
def decorated_spark_func():
144166
spark_session = SparkSession.builder.config('spark.driver.host', '127.0.0.1').getOrCreate()

tests/test_pandas_parameter_type.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def test_pandas_handling(self, decorated_pandas_func):
3131
assert '3.0' in version_list
3232
assert '3.1' in version_list
3333

34+
pandas_input = {'state': ['WA'], 'url': ['http://fakeurl.com']}
35+
result = decorated_pandas_func(pandas_input)
36+
assert_frame_equal(result, state)
37+
3438
def test_pandas_orient_handling(self, decorated_pandas_func_split_orient):
3539
pandas_input = {"columns": ["name", "state"], "index": [0], "data": [["Sarah", "WA"]]}
3640
state = pd.DataFrame(pd.read_json(json.dumps(pandas_input), orient='split')['state'])
@@ -52,6 +56,17 @@ def test_pandas_int_column_labels(self, decorated_pandas_func_int_column_labels,
5256
result = decorated_pandas_func_int_column_labels(input)
5357
assert_frame_equal(result, pandas_sample_input_int_column_labels)
5458

59+
def test_pandas_url_handling(self, decorated_pandas_uri_func):
60+
pandas_input = {'state': ['WA'], 'website': ['http://wa.website.foo']}
61+
website = pandas_input['website'][0]
62+
result = decorated_pandas_uri_func(pandas_input)
63+
assert website == result
64+
65+
pandas_input = {'state': ['WA'], 'website': ['This is an embedded url: http://wa.website.foo']}
66+
website = pandas_input['website'][0]
67+
result = decorated_pandas_uri_func(pandas_input)
68+
assert website == result
69+
5570

5671
class TestNestedType(object):
5772

0 commit comments

Comments
 (0)