Skip to content

Commit 3905544

Browse files
authored
Merge pull request #708 from openml/fix_639
Adding documentation for _convert_array_format output parameter
2 parents 6ec4ad1 + 299da1d commit 3905544

File tree

1 file changed

+23
-4
lines changed

1 file changed

+23
-4
lines changed

openml/datasets/dataset.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,25 @@ def decode_arff(fh):
398398
def _convert_array_format(data, array_format, attribute_names):
399399
"""Convert a dataset to a given array format.
400400
401-
By default, the data are stored as a sparse matrix or a pandas
402-
dataframe. One might be interested to get a pandas SparseDataFrame or a
403-
NumPy array instead, respectively.
401+
Converts to numpy array if data is non-sparse.
402+
Converts to a sparse dataframe if data is sparse.
403+
404+
Parameters
405+
----------
406+
array_format : str {'array', 'dataframe'}
407+
Desired data type of the output
408+
- If array_format='array'
409+
If data is non-sparse
410+
Converts to numpy-array
411+
Enforces numeric encoding of categorical columns
412+
Missing values are represented as NaN in the numpy-array
413+
else returns data as is
414+
- If array_format='dataframe'
415+
If data is sparse
416+
Works only on sparse data
417+
Converts sparse data to sparse dataframe
418+
else returns data as is
419+
404420
"""
405421
if array_format == "array" and not scipy.sparse.issparse(data):
406422
# We encode the categories such that they are integer to be able
@@ -426,8 +442,11 @@ def _encode_if_category(column):
426442
'PyOpenML cannot handle string when returning numpy'
427443
' arrays. Use dataset_format="dataframe".'
428444
)
429-
if array_format == "dataframe" and scipy.sparse.issparse(data):
445+
elif array_format == "dataframe" and scipy.sparse.issparse(data):
430446
return pd.SparseDataFrame(data, columns=attribute_names)
447+
else:
448+
data_type = "sparse-data" if scipy.sparse.issparse(data) else "non-sparse data"
449+
warn("Cannot convert {} to '{}'. Returning input data.".format(data_type, array_format))
431450
return data
432451

433452
@staticmethod

0 commit comments

Comments
 (0)