Skip to content

Commit e3c66e5

Browse files
committed
Adding documentation for array_format
1 parent 461814d commit e3c66e5

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

openml/datasets/dataset.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,18 @@ def decode_arff(fh):
368368
def _convert_array_format(data, array_format, attribute_names):
369369
"""Convert a dataset to a given array format.
370370
371-
By default, the data are stored as a sparse matrix or a pandas
372-
dataframe. One might be interested to get a pandas SparseDataFrame or a
373-
NumPy array instead, respectively.
371+
Parameters
372+
----------
373+
array_format : str
374+
Tag to attach to the dataset to get a pandas SparseDataFrame or a
375+
NumPy array instead.
376+
- If array_format='array'
377+
Converts non-sparse numeric data to numpy-array
378+
Enforces numeric encoding of categorical columns
379+
Missing values are represented as NaN in the dataframe
380+
- If array_format='dataframe'
381+
Convers sparse data to sparse dataframe
382+
374383
"""
375384
if array_format == "array" and not scipy.sparse.issparse(data):
376385
# We encode the categories such that they are integer to be able

0 commit comments

Comments
 (0)