Skip to content

Commit b0ad058

Browse files
authored
Merge pull request #76 from wamartin-aml/wamartin/stopInferringJSONtoDataFrame
Stop inferring data types when converting JSON to DataFrame
2 parents 0270fc4 + 94172d3 commit b0ad058

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

inference_schema/parameter_types/pandas_parameter_type.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def deserialize_input(self, input_data):
8080
raise Exception("Error, unable to convert input of type {} into Pandas Dataframe".format(type(input_data)))
8181

8282
string_stream = StringIO(json.dumps(input_data))
83-
data_frame = pd.read_json(string_stream, orient=self.orient)
83+
data_frame = pd.read_json(string_stream, orient=self.orient, dtype=False)
8484

8585
if self.apply_column_names:
8686
data_frame.columns = self.sample_input.columns.copy()

tests/conftest.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,29 @@ def pandas_url_func(param):
164164
return pandas_url_func
165165

166166

167+
@pytest.fixture(scope="session")
168+
def pandas_sample_input_with_categorical():
169+
pandas_input_data = {'state': ['characters'], 'cat': ['000']}
170+
return pd.DataFrame(data=pandas_input_data)
171+
172+
173+
@pytest.fixture(scope="session")
174+
def decorated_pandas_categorical_func(pandas_sample_input_with_categorical):
175+
@input_schema('param', PandasParameterType(pandas_sample_input_with_categorical))
176+
def pandas_categorical_func(param):
177+
"""
178+
179+
:param param:
180+
:type param: pd.DataFrame
181+
:return:
182+
:rtype: string
183+
"""
184+
assert type(param) is pd.DataFrame
185+
return param['cat'][0]
186+
187+
return pandas_categorical_func
188+
189+
167190
@pytest.fixture(scope="session")
168191
def decorated_spark_func():
169192
spark_session = SparkSession.builder.config('spark.driver.host', '127.0.0.1').getOrCreate()

tests/test_pandas_parameter_type.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,17 @@ def test_pandas_url_handling(self, decorated_pandas_uri_func):
6767
result = decorated_pandas_uri_func(pandas_input)
6868
assert website == result
6969

70+
def test_pandas_categorical_handling(self, decorated_pandas_categorical_func):
71+
pandas_input = {'state': ['WA'], 'cat': ['000']}
72+
categorical = pandas_input['cat'][0]
73+
result = decorated_pandas_categorical_func(pandas_input)
74+
assert categorical == result
75+
76+
pandas_input = {'state': ['WA'], 'cat': ['001']}
77+
categorical = pandas_input['cat'][0]
78+
result = decorated_pandas_categorical_func(pandas_input)
79+
assert categorical == result
80+
7081

7182
class TestNestedType(object):
7283

0 commit comments

Comments
 (0)