-
Notifications
You must be signed in to change notification settings - Fork 54
Closed
NVIDIA-Merlin/core
#43Description
Bug description
I am getting the following error when I want to generate item_embeddings.
schema = TensorflowMetadata.from_proto_text_file('./train/').to_merlin_schema()
schema = schema.select_by_name(['user_id', 'user_gender', 'user_age', 'user_geography', 'user_profile', 'user_group','item_id', 'item_category', 'item_brand', 'item_shop'])
model = mm.TwoTowerModel(schema, query_tower= mm.MLPBlock([32, 64]))
model.compile(optimizer="adam", run_eagerly=False)
train_loader = get_dataloader(nvt.Dataset(train_path), shuffle=True)
losses = model.fit(train_loader, epochs=1)
item_features = cudf.read_parquet('item_featues.parquet')
model.item_embeddings(nvt.Dataset(item_features, schema=schema), batch_size=4096)
WARNING:absl:Found untraced functions such as block_context_layer_call_fn, block_context_layer_call_and_return_conditional_losses, block_context_layer_call_fn, block_context_layer_call_and_return_conditional_losses, block_context_layer_call_and_return_conditional_losses while saving (showing 5 of 55). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: /tmp/tmppt5w0fbu/assets
INFO:tensorflow:Assets written to: /tmp/tmppt5w0fbu/assets
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [15], in <cell line: 1>()
----> 1 model.item_embeddings(nvt.Dataset(item_features, schema=schema), batch_size=4096)
File /models/merlin/models/tf/core.py:2659, in RetrievalModel.item_embeddings(self, dataset, item_tag, item_id_tag, batch_size)
2640 """Export item embeddings from the model.
2641
2642 Parameters
(...)
2655 merlin.io.Dataset
2656 """
2657 from merlin.models.tf.utils.batch_utils import ItemEmbeddings
-> 2659 get_item_emb = ItemEmbeddings(self, batch_size=batch_size)
2661 dataset = self._ensure_unique(dataset, item_tag, item_id_tag)
2662 embeddings = dataset.map_partitions(get_item_emb)
File /models/merlin/models/tf/utils/batch_utils.py:114, in ItemEmbeddings.__init__(self, model, batch_size, save_path)
111 item_block = model.block.first.item_block()
112 schema = item_block.schema
--> 114 super().__init__(
115 item_block,
116 save_path=save_path,
117 batch_size=batch_size,
118 schema=schema,
119 output_concat_func=np.concatenate,
120 )
File /models/merlin/models/tf/utils/batch_utils.py:77, in TFModelEncode.__init__(self, model, output_names, batch_size, save_path, block_load_func, schema, output_concat_func)
66 def __init__(
67 self,
68 model: tp.Union[Model, tf.keras.Model],
(...)
74 output_concat_func=None,
75 ):
76 save_path = save_path or tempfile.mkdtemp()
---> 77 model.save(save_path)
79 model_load_func = block_load_func if block_load_func else tf.keras.models.load_model
80 if not output_names:
File /usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File /models/merlin/models/tf/core.py:2274, in ModelBlock.get_config(self)
2273 def get_config(self):
-> 2274 return {"block": tf.keras.utils.serialize_keras_object(self.block)}
File /models/merlin/models/tf/core.py:789, in SequentialBlock.get_config(self)
787 config = {}
788 for i, layer in enumerate(self.layers):
--> 789 config[i] = tf.keras.utils.serialize_keras_object(layer)
791 return config
File /models/merlin/models/tf/core.py:1553, in ParallelBlock.get_config(self)
1551 def get_config(self):
1552 return maybe_serialize_keras_objects(
-> 1553 self, super(ParallelBlock, self).get_config(), ["parallel_layers"]
1554 )
File /models/merlin/models/tf/core.py:1174, in TabularBlock.get_config(self)
1171 config = maybe_serialize_keras_objects(self, config, ["pre", "post", "aggregation"])
1173 if self.schema:
-> 1174 config["schema"] = schema_to_tensorflow_metadata_json(self.schema)
1176 return config
File /models/merlin/models/utils/schema.py:38, in schema_to_tensorflow_metadata_json(schema, path)
37 def schema_to_tensorflow_metadata_json(schema, path=None):
---> 38 json = TensorflowMetadata.from_merlin_schema(schema).to_json()
39 if path:
40 with open(path, "w") as o:
File /core/merlin/schema/io/tensorflow_metadata.py:97, in TensorflowMetadata.to_json(self)
96 def to_json(self):
---> 97 return self.proto_schema.to_json()
File /usr/local/lib/python3.8/dist-packages/betterproto/__init__.py:909, in Message.to_json(self, indent)
907 def to_json(self, indent: Union[None, int, str] = None) -> str:
908 """Returns the encoded JSON representation of this message instance."""
--> 909 return json.dumps(self.to_dict(), indent=indent)
File /usr/lib/python3.8/json/__init__.py:231, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
226 # cached encoder
227 if (not skipkeys and ensure_ascii and
228 check_circular and allow_nan and
229 cls is None and indent is None and separators is None and
230 default is None and not sort_keys and not kw):
--> 231 return _default_encoder.encode(obj)
232 if cls is None:
233 cls = JSONEncoder
File /usr/lib/python3.8/json/encoder.py:199, in JSONEncoder.encode(self, o)
195 return encode_basestring(o)
196 # This doesn't pass the iterator directly to ''.join() because the
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
File /usr/lib/python3.8/json/encoder.py:257, in JSONEncoder.iterencode(self, o, _one_shot)
252 else:
253 _iterencode = _make_iterencode(
254 markers, self.default, _encoder, self.indent, floatstr,
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
File /usr/lib/python3.8/json/encoder.py:179, in JSONEncoder.default(self, o)
160 def default(self, o):
161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
(...)
177
178 """
--> 179 raise TypeError(f'Object of type {o.__class__.__name__} '
180 f'is not JSON serializable')
TypeError: Object of type Struct is not JSON serializable
Steps/Code to reproduce bug
Expected behavior
Environment details
- Merlin version: merlin-tensorflow-training:22:03 container
- Platform: Docker image
- Python version:
- PyTorch version (GPU?):
- Tensorflow version (GPU?): Tensorflow
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working