better handling of input types

jlwalke2 · jlwalke2 · commit 512cf2129875 · 2019-08-19T16:26:11.000-04:00
diff --git a/src/sasctl/_services/microanalytic_score.py b/src/sasctl/_services/microanalytic_score.py
@@ -9,6 +9,8 @@
 import re
 from collections import OrderedDict
 
+import six
+
 from .service import Service
 
 
@@ -116,7 +118,18 @@ def execute_module_step(self, module, step, return_dict=True, **kwargs):
         module = module.id
         step = step.id if hasattr(step, 'id') else step
 
-        body = {'inputs': [{'name': k, 'value': v} for k, v in kwargs.items()]}
+        # Make sure all inputs are JSON serializable
+        # Common types such as numpy.int64 and numpy.float64 are NOT serializable
+        for k in kwargs.keys():
+            type_name = type(kwargs[k]).__name__
+            if type_name == 'float64':
+                kwargs[k] = float(kwargs[k])
+            elif type_name == 'int64':
+                kwargs[k] = int(kwargs[k])
+
+
+        body = {'inputs': [{'name': k, 'value': v}
+                           for k, v in six.iteritems(kwargs)]}
         r = self.post('/modules/{}/steps/{}'.format(module, step), json=body)
 
         # Convert list of name/value pair dictionaries to single dict
diff --git a/src/sasctl/tasks.py b/src/sasctl/tasks.py
@@ -88,7 +88,13 @@ def register_model(model, name, project, repository=None, input=None,
     repository : str or dict, optional
         The name or id of the repository, or a dictionary representation of
         the repository.  If omitted, the default repository will be used.
-    input
+    input : DataFrame, type, list of type, or dict of str: type, optional
+        The expected type for each input value of the target function.
+        Can be omitted if target function includes type hints.  If a DataFrame
+        is provided, the columns will be inspected to determine type information.
+        If a single type is provided, all columns will be assumed to be that type,
+        otherwise a list of column types or a dictionary of column_name: type
+        may be provided.
     version : {'new', 'latest', int}, optional
         Version number of the project in which the model should be created.
         Defaults to 'new'.
diff --git a/src/sasctl/utils/pymas/core.py b/src/sasctl/utils/pymas/core.py
@@ -200,9 +200,13 @@ def from_pickle(file, func_name=None, input_types=None, array_input=False,
         object, and bytes is assumed to be the raw pickled bytes.
     func_name : str
         Name of the target function to call
-    input_types : list of type, optional
+    input_types : DataFrame, type, list of type, or dict of str: type, optional
         The expected type for each input value of the target function.
-        Can be ommitted if target function includes type hints.
+        Can be omitted if target function includes type hints.  If a DataFrame
+        is provided, the columns will be inspected to determine type information.
+        If a single type is provided, all columns will be assumed to be that type,
+        otherwise a list of column types or a dictionary of column_name: type
+        may be provided.
     array_input : bool
         Whether the function inputs should be treated as an array instead of
         individual parameters
@@ -271,7 +275,8 @@ def _build_pymas(obj, func_name=None, input_types=None, array_input=False,
 
         # Run one observation through the model and use the result to
         # determine output variables
-        output = target_func(input_types.iloc[0, :].values.reshape((1, -1)))
+        output = target_func(input_types.head(1))
+        # output = target_func(input_types.iloc[0, :].values.reshape((1, -1)))
         output_vars = ds2_variables(output, output_vars=True)
         vars.extend(output_vars)
     elif isinstance(input_types, type):
diff --git a/src/sasctl/utils/pymas/python.py b/src/sasctl/utils/pymas/python.py
@@ -51,8 +51,14 @@ def ds2_variables(input, output_vars=False):
         types = input
     elif hasattr(input, 'columns') and hasattr(input, 'dtypes'):
         # Pandas DataFrame
-        types = OrderedDict([(col, (input[col].dtype.name.replace('object', 'char'), False)) for col in input.columns])
-        # types = {col: (input[col].dtype.name.replace('object', 'char'), False) for col in input.columns}
+        types = OrderedDict()
+        for col in input.columns:
+            if input[col].dtype.name == 'object':
+                types[col] = ('char', False)
+            elif input[col].dtype.name == 'category':
+                types[col] = ('char', False)
+            else:
+                types[col] = (input[col].dtype.name, False)
     elif hasattr(input, 'dtype'):
         # Numpy array?  No column names, but we can at least create dummy vars of the correct type
         types = OrderedDict([('var{}'.format(i),