@@ -398,9 +398,25 @@ def decode_arff(fh):
398398 def _convert_array_format (data , array_format , attribute_names ):
399399 """Convert a dataset to a given array format.
400400
401- By default, the data are stored as a sparse matrix or a pandas
402- dataframe. One might be interested to get a pandas SparseDataFrame or a
403- NumPy array instead, respectively.
401+ Converts to numpy array if data is non-sparse.
402+ Converts to a sparse dataframe if data is sparse.
403+
404+ Parameters
405+ ----------
406+ array_format : str {'array', 'dataframe'}
407+ Desired data type of the output
408+ - If array_format='array'
409+ If data is non-sparse
410+ Converts to numpy-array
411+ Enforces numeric encoding of categorical columns
412+ Missing values are represented as NaN in the numpy-array
413+ else returns data as is
414+ - If array_format='dataframe'
415+ If data is sparse
416+ Works only on sparse data
417+ Converts sparse data to sparse dataframe
418+ else returns data as is
419+
404420 """
405421 if array_format == "array" and not scipy .sparse .issparse (data ):
406422 # We encode the categories such that they are integer to be able
@@ -426,8 +442,11 @@ def _encode_if_category(column):
426442 'PyOpenML cannot handle string when returning numpy'
427443 ' arrays. Use dataset_format="dataframe".'
428444 )
429- if array_format == "dataframe" and scipy .sparse .issparse (data ):
445+ elif array_format == "dataframe" and scipy .sparse .issparse (data ):
430446 return pd .SparseDataFrame (data , columns = attribute_names )
447+ else :
448+ data_type = "sparse-data" if scipy .sparse .issparse (data ) else "non-sparse data"
449+ warn ("Cannot convert {} to '{}'. Returning input data." .format (data_type , array_format ))
431450 return data
432451
433452 @staticmethod
0 commit comments