-
Notifications
You must be signed in to change notification settings - Fork 15
Open
Labels
Description
Reproducer code:
import numpy as np
import cudf
import nvtabular as nvt
from merlin.schema.tags import Tags
purchases = cudf.DataFrame(
data={'user_id': [0, 1, 2, 2],
'price': [125.04, 23.07, 101.2, 2.34],
'color': ['blue', 'blue', 'red', 'yellow'],
'model': ['deluxe', 'compact', 'regular', 'regular']
})
out = ['price'] >> nvt.ops.AddMetadata(tags=[Tags.TARGET])
out += ['price'] >> nvt.ops.AddTags(tags=[Tags.CONTINUOUS])
out += ['user_id'] >> nvt.ops.TagAsUserID()
out += ['color', 'model'] >> nvt.ops.TagAsItemFeatures()
out += ['color', 'model'] >> nvt.ops.AddTags(tags=[Tags.CATEGORICAL])
ds = nvt.Dataset(purchases)
wf = nvt.Workflow(out)
ds_out = wf.fit_transform(ds)
ds_out.schema
ds_out = ds_out.repartition(5)
ds_out.schema
