Skip to content

Commit 39b2773

Browse files
authored
Change apply_column_names default to false (#56)
* Change apply_column_names default to false * Add deprecation message * flake8 fix * More flake8 * Update tests. Update docs for pandas parameter type. * Use a different name to avoid conflicts in the global dict * Missed changing the return
1 parent 289a59d commit 39b2773

File tree

3 files changed

+28
-16
lines changed

3 files changed

+28
-16
lines changed

inference_schema/parameter_types/pandas_parameter_type.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,23 @@
77
from .abstract_parameter_type import AbstractParameterType
88
from ._util import get_swagger_for_list, get_swagger_for_nested_dict
99
from ._constants import SWAGGER_FORMAT_CONSTANTS
10+
from warnings import warn
1011

1112

1213
class PandasParameterType(AbstractParameterType):
1314
"""
1415
Class used to specify an expected parameter as a Pandas type.
1516
"""
1617

17-
def __init__(self, sample_input, enforce_column_type=True, enforce_shape=True, apply_column_names=True,
18+
def __init__(self, sample_input, enforce_column_type=True, enforce_shape=True, apply_column_names=False,
1819
orient='records'):
1920
"""
20-
Construct the PandasParameterType object.
21+
Construct the PandasParameterType object. An important note regarding Pandas DataFrame handling; by default,
22+
Pandas supports integer type column names in a DataFrame. However, when using the built in methods for
23+
converting a json object to a DataFrame, unless all of the columns are integers, they will all be converted
24+
to strings. This ParameterType uses the built in methods for performing conversion, and as such
25+
`deserialize_input` has the same limitation. It is recommended to not use a mix of string and integer
26+
type column names in the provided sample, as this can lead to inconsistent/unexpected behavior.
2127
2228
:param sample_input: A sample input dataframe. This sample will be used as a basis for column types and array
2329
shape.
@@ -28,8 +34,10 @@ def __init__(self, sample_input, enforce_column_type=True, enforce_shape=True, a
2834
:param enforce_shape: Enforce that input shape must match that of the provided sample when `deserialize_input`
2935
is called.
3036
:type enforce_shape: bool
31-
:param apply_column_names: Apply column names from the provided sample onto the input when `deserialize_input`
32-
is called.
37+
:param apply_column_names: [DEPRECATED] Apply column names from the provided sample onto the input when
38+
`deserialize_input` is called. Disabled by default, as there is no guaranteed order for dictionary keys,
39+
so it's possible for names to be applied in the wrong order when `deserialize_input` is called. The
40+
property is deprecated, and will be removed in a future update.
3341
:type apply_column_names: bool
3442
:param orient: The Pandas orient to use when converting between a json object and a DataFrame. Possible orients
3543
are 'split', 'records', 'index', 'columns', 'values', or 'table'. More information about these orients can
@@ -42,6 +50,10 @@ def __init__(self, sample_input, enforce_column_type=True, enforce_shape=True, a
4250
super(PandasParameterType, self).__init__(sample_input)
4351
self.enforce_column_type = enforce_column_type
4452
self.enforce_shape = enforce_shape
53+
54+
if apply_column_names:
55+
warn('apply_column_names is a deprecated parameter and will be removed in a future update',
56+
DeprecationWarning, stacklevel=2)
4557
self.apply_column_names = apply_column_names
4658

4759
if orient not in ('split', 'records', 'index', 'columns', 'values', 'table'):

tests/conftest.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ def pandas_sample_output():
5858

5959

6060
@pytest.fixture(scope="session")
61-
def pandas_sample_input_multi_type_column_labels():
62-
pandas_input_data = {'name': ['Sarah', 'John'], 1: ['WA', 'CA']}
61+
def pandas_sample_input_int_column_labels():
62+
pandas_input_data = {0: ['Sarah', 'John'], 1: ['WA', 'CA']}
6363
return pd.DataFrame(data=pandas_input_data)
6464

6565

@@ -121,22 +121,22 @@ def pandas_split_orient_func(param):
121121

122122

123123
@pytest.fixture(scope="session")
124-
def decorated_pandas_func_multi_type_column_labels(pandas_sample_input_multi_type_column_labels):
124+
def decorated_pandas_func_int_column_labels(pandas_sample_input_int_column_labels):
125125

126-
@input_schema('param', PandasParameterType(pandas_sample_input_multi_type_column_labels))
127-
def pandas_split_orient_func(param):
126+
@input_schema('param', PandasParameterType(pandas_sample_input_int_column_labels))
127+
def pandas_int_column_labels_func(param):
128128
"""
129129
130130
:param param:
131131
:type param: pd.DataFrame
132132
:return:
133133
:rtype: pd.DataFrame
134134
"""
135-
assert param["name"] is not None
135+
assert param[0] is not None
136136
assert param[1] is not None
137137
return param
138138

139-
return pandas_split_orient_func
139+
return pandas_int_column_labels_func
140140

141141

142142
@pytest.fixture(scope="session")

tests/test_pandas_parameter_type.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ def test_pandas_timestamp_handling(self, decorated_pandas_datetime_func):
4646
result = decorated_pandas_datetime_func(**pandas_input)
4747
assert_frame_equal(result, datetime)
4848

49-
def test_pandas_multi_type_columns_labels_handling(self, decorated_pandas_func_multi_type_column_labels):
50-
pandas_input = {'name': ['Sarah', 'John'], 1: ['WA', 'CA']}
51-
result = decorated_pandas_func_multi_type_column_labels(pandas_input)
52-
expected_result = pd.DataFrame(pandas_input)
53-
assert_frame_equal(result, expected_result)
49+
def test_pandas_int_column_labels(self, decorated_pandas_func_int_column_labels,
50+
pandas_sample_input_int_column_labels):
51+
input = pandas_sample_input_int_column_labels.to_dict(orient='records')
52+
result = decorated_pandas_func_int_column_labels(input)
53+
assert_frame_equal(result, pandas_sample_input_int_column_labels)
5454

5555

5656
class TestNestedType(object):

0 commit comments

Comments
 (0)