Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions merlin/datasets/ecommerce/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from merlin.datasets.ecommerce.aliccp.dataset import (
default_aliccp_transformation,
get_aliccp,
prepare_alliccp,
prepare_aliccp,
transform_aliccp,
)
from merlin.datasets.ecommerce.booking.dataset import get_booking, transform_booking
from merlin.datasets.ecommerce.dressipi.dataset import get_dressipi2022

__all__ = [
"prepare_alliccp",
"prepare_aliccp",
"transform_aliccp",
"get_aliccp",
"default_aliccp_transformation",
Expand Down
6 changes: 3 additions & 3 deletions merlin/datasets/ecommerce/aliccp/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_aliccp(
1. Download the raw data from
[tianchi.aliyun.com](https://tianchi.aliyun.com/dataset/dataDetail?dataId=408#1).
2. Unzip the raw data to a directory.
3. Run `prepare_alliccp(data_dir)` to convert the raw data to parquet files.
3. Run `prepare_aliccp(data_dir)` to convert the raw data to parquet files.

Downloading & preparing the data can take quite a while.
In case you want to use this dataset to run our tutorials, you can also opt for synthetic data.
Expand Down Expand Up @@ -98,7 +98,7 @@ def get_aliccp(
raw_path = p / "raw"
if not raw_path.exists():
raw_path.mkdir(parents=True)
prepare_alliccp(path, output_dir=raw_path, file_size=file_size, **kwargs)
prepare_aliccp(path, output_dir=raw_path, file_size=file_size, **kwargs)

nvt_path = p / transformed_name
train_path, valid_path = nvt_path / "train", nvt_path / "valid"
Expand All @@ -112,7 +112,7 @@ def get_aliccp(
return train, valid


def prepare_alliccp(
def prepare_aliccp(
data_dir: Union[str, Path],
convert_train: bool = True,
convert_test: bool = True,
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/datasets/test_ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_synthetic_aliccp_raw_data(tmp_path):
MAYBE_ALICCP_DATA is None,
reason="ALI-CCP data is not available, pass it through env variable $DATA_PATH_ALICCP",
)
def test_get_alliccp():
def test_get_aliccp():
data_path = MAYBE_ALICCP_DATA

nvt_workflow = ecommerce.default_aliccp_transformation(add_target_encoding=False)
Expand All @@ -104,10 +104,10 @@ def test_get_alliccp():
MAYBE_ALICCP_DATA is None,
reason="ALI-CCP data is not available, pass it through env variable $DATA_PATH_ALICCP",
)
def test_prepare_alliccp(tmp_path):
def test_prepare_aliccp(tmp_path):
data_path = MAYBE_ALICCP_DATA

ecommerce.prepare_alliccp(data_path, file_size=50, max_num_rows=100, output_dir=tmp_path)
ecommerce.prepare_aliccp(data_path, file_size=50, max_num_rows=100, output_dir=tmp_path)
output_files = list(tmp_path.glob("*/*"))

assert len(output_files) == 2
Expand All @@ -118,7 +118,7 @@ def test_prepare_alliccp(tmp_path):
MAYBE_ALICCP_DATA is None,
reason="ALI-CCP data is not available, pass it through env variable $DATA_PATH_ALICCP",
)
def test_transform_alliccp(tmp_path):
def test_transform_aliccp(tmp_path):
data_path = MAYBE_ALICCP_DATA

ecommerce.transform_aliccp(data_path, tmp_path)
Expand Down