Add support for DistributedEmbedding for Ironwood and expose has_sparsecores.

hertschuh · hertschuh · commit a7147e043194 · 2026-02-25T16:39:23.000-08:00
- Ironwood is identified as `TPUv7x`.
- Also added `TPU v5p`.
- Added public class method `DistributedEmbedding.has_sparsecores`.
- Removed duplicate implementation of `has_sparsecores` in `test_utils`.
diff --git a/keras_rs/src/layers/embedding/base_distributed_embedding.py b/keras_rs/src/layers/embedding/base_distributed_embedding.py
@@ -552,17 +552,17 @@ def _init_feature_configs_structures(
         ] = {}
 
         # Lazily initialized.
-        has_sparsecore = None
+        has_sparsecores = None
 
         for path, feature_config in paths_and_feature_configs:
             if isinstance(feature_config, FeatureConfig):
                 placement = feature_config.table.placement
                 # Resolve "auto" to an actual placement.
                 if placement == "auto":
-                    if has_sparsecore is None:
-                        has_sparsecore = self._has_sparsecore()
+                    if has_sparsecores is None:
+                        has_sparsecores = self.has_sparsecores()
                     placement = (
-                        "sparsecore" if has_sparsecore else "default_device"
+                        "sparsecore" if has_sparsecores else "default_device"
                     )
             else:
                 # It's a `tf.tpu.experimental.embedding.FeatureConfig`.
@@ -936,7 +936,23 @@ def _default_device_get_embedding_tables(self) -> dict[str, types.Tensor]:
             )
         return tables
 
-    def _has_sparsecore(self) -> bool:
+    @classmethod
+    def has_sparsecores(cls) -> bool:
+        """Return whether the current devices are TPUs with SparseCore chips.
+
+        This is a class method and can be invoked before instantiating a
+        `DistributedEmbedding`.
+
+        Returns:
+            True if devices are TPUs with SparseCore chips.
+
+        Example:
+
+        ```python
+        if keras_rs.layers.DistributedEmbedding.has_sparsecores():
+            print("We have SparseCores")
+        ```                        
+        """
         # Explicitly check for SparseCore availability.
         # We need this check here rather than in jax/distributed_embedding.py
         # so that we can warn the user about missing dependencies.
@@ -952,7 +968,7 @@ def _has_sparsecore(self) -> bool:
 
             if len(tpu_devices) > 0:
                 device_kind = tpu_devices[0].device_kind
-                if device_kind in ["TPU v5", "TPU v6 lite"]:
+                if device_kind in ["TPU v5", "TPU v5p", "TPU v6 lite", "TPU7x"]:
                     return True
 
         return False
diff --git a/keras_rs/src/layers/embedding/jax/distributed_embedding.py b/keras_rs/src/layers/embedding/jax/distributed_embedding.py
@@ -348,7 +348,7 @@ def _sparsecore_init(
         feature_configs: dict[str, FeatureConfig],
         table_stacking: str | Sequence[str] | Sequence[Sequence[str]],
     ) -> None:
-        if not self._has_sparsecore():
+        if not self.has_sparsecores():
             raise ValueError(
                 "Not sparse cores available, cannot use explicit sparsecore"
                 " placement."
diff --git a/keras_rs/src/layers/embedding/jax/distributed_embedding_test.py b/keras_rs/src/layers/embedding/jax/distributed_embedding_test.py
@@ -61,11 +61,11 @@ def _create_sparsecore_layout(
 
 
 def _num_sparsecores_per_device() -> int:
-    if test_utils.has_sparsecores():
+    try:
         return jte_utils.num_sparsecores_per_device()
-
-    # Default to one for non-sparsecore tests.
-    return 1
+    except ValueError:
+        # Default to one for non-sparsecore tests.
+        return 1
 
 
 @pytest.mark.skipif(
diff --git a/keras_rs/src/layers/embedding/jax/embedding_lookup_test.py b/keras_rs/src/layers/embedding/jax/embedding_lookup_test.py
@@ -14,6 +14,7 @@
 from jax_tpu_embedding.sparsecore.lib.nn import table_stacking
 from jax_tpu_embedding.sparsecore.utils import utils as jte_utils
 
+from keras_rs.src.layers.embedding.jax import distributed_embedding
 from keras_rs.src.layers.embedding.jax import embedding_lookup
 from keras_rs.src.layers.embedding.jax import embedding_utils
 from keras_rs.src.layers.embedding.jax import test_utils
@@ -133,7 +134,7 @@ def _create_table_and_feature_specs(
         stacked=[True, False],
     )
     def test_forward_pass(self, ragged: bool, stacked: bool):
-        if not test_utils.has_sparsecores():
+        if not distributed_embedding.DistributedEmbedding.has_sparsecores():
             self.skipTest("Test requires sparsecores.")
 
         devices = jax.devices()
@@ -215,7 +216,7 @@ def test_forward_pass(self, ragged: bool, stacked: bool):
     def test_model_sharding(
         self, ragged: bool, stacked: bool, num_model_shards: int
     ):
-        if not test_utils.has_sparsecores():
+        if not distributed_embedding.DistributedEmbedding.has_sparsecores():
             self.skipTest("Test requires sparsecores.")
 
         if num_model_shards > jax.device_count():
@@ -319,7 +320,7 @@ def test_backward_pass(
         stacked: bool,
         optimizer: embedding_spec.OptimizerSpec,
     ):
-        if not test_utils.has_sparsecores():
+        if not distributed_embedding.DistributedEmbedding.has_sparsecores():
             self.skipTest("Test requires sparsecores.")
 
         devices = jax.devices()
@@ -426,7 +427,7 @@ def test_autograd(
         stacked: bool,
         optimizer: embedding_spec.OptimizerSpec,
     ):
-        if not test_utils.has_sparsecores():
+        if not distributed_embedding.DistributedEmbedding.has_sparsecores():
             self.skipTest("Test requires sparsecores.")
 
         devices = jax.devices()
diff --git a/keras_rs/src/layers/embedding/jax/test_utils.py b/keras_rs/src/layers/embedding/jax/test_utils.py
@@ -20,13 +20,6 @@
 Shape: TypeAlias = tuple[int, ...]
 
 
-def has_sparsecores() -> bool:
-    device_kind = jax.devices()[0].device_kind
-    if device_kind in ["TPU v5", "TPU v6 lite"]:
-        return True
-    return False
-
-
 def _round_up_to_multiple(value: int, multiple: int) -> int:
     return ((value + multiple - 1) // multiple) * multiple
 
diff --git a/keras_rs/src/layers/embedding/tensorflow/distributed_embedding.py b/keras_rs/src/layers/embedding/tensorflow/distributed_embedding.py
@@ -61,15 +61,17 @@ def __init__(
             feature_configs, table_stacking=table_stacking, **kwargs
         )
 
-    def _is_tpu_strategy(self, strategy: tf.distribute.Strategy) -> bool:
+    @classmethod
+    def _is_tpu_strategy(cls, strategy: tf.distribute.Strategy) -> bool:
         return isinstance(
             strategy,
             (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy),
         )
 
-    def _has_sparsecore(self) -> bool:
+    @classmethod
+    def has_sparsecores(cls) -> bool:
         strategy = tf.distribute.get_strategy()
-        if self._is_tpu_strategy(strategy):
+        if cls._is_tpu_strategy(strategy):
             tpu_embedding_feature = (
                 strategy.extended.tpu_hardware_feature.embedding_feature
             )

Original file line number	Diff line number	Diff line change
`@@ -61,15 +61,17 @@ def __init__(`
`61`	`61`	`feature_configs, table_stacking=table_stacking, **kwargs`
`62`	`62`	`)`
`63`	`63`
`64`		`- def _is_tpu_strategy(self, strategy: tf.distribute.Strategy) -> bool:`
	`64`	`+ @classmethod`
	`65`	`+ def _is_tpu_strategy(cls, strategy: tf.distribute.Strategy) -> bool:`
`65`	`66`	`return isinstance(`
`66`	`67`	`strategy,`
`67`	`68`	`(tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy),`
`68`	`69`	`)`
`69`	`70`
`70`		`- def _has_sparsecore(self) -> bool:`
	`71`	`+ @classmethod`
	`72`	`+ def has_sparsecores(cls) -> bool:`
`71`	`73`	`strategy = tf.distribute.get_strategy()`
`72`		`- if self._is_tpu_strategy(strategy):`
	`74`	`+ if cls._is_tpu_strategy(strategy):`
`73`	`75`	`tpu_embedding_feature = (`
`74`	`76`	`strategy.extended.tpu_hardware_feature.embedding_feature`
`75`	`77`	`)`