Temporarily allow bfloat16 dot algorithms on CPU.

dfm · Google-ML-Automation · commit 1f4d184ac889 · 2024-12-07T11:14:09.000-08:00
Since XLA:CPU doesn't (yet!) support explicit algorithms for controlling the precision of dot products we have a check in JAX that fails when a non-trivial algorithm is specified on CPU. In order to support downstream use cases, this change allows some bfloat16 algorithms to pass through. XLA:CPU "emulates" these algorithms using `F32_F32_F32` with the appropriate casting, so that means that CPU numerics will be different than on other platforms with explicit algorithm support, but it is useful to be able to use these algorithms with the correct input and output casting without requiring platform dependent logic in user code.

PiperOrigin-RevId: 703834889
diff --git a/jax/_src/lax/lax.py b/jax/_src/lax/lax.py
@@ -3773,6 +3773,8 @@ def _is_fp8_mixed_precision_matmul(_lhs_dtypes, _rhs_dtypes):
     if platform == "cpu" and precision not in {
         DotAlgorithmPreset.DEFAULT, DotAlgorithmPreset.F16_F16_F16,
         DotAlgorithmPreset.F32_F32_F32, DotAlgorithmPreset.F64_F64_F64,
+        DotAlgorithmPreset.BF16_BF16_F32, DotAlgorithmPreset.BF16_BF16_F32_X3,
+        DotAlgorithmPreset.BF16_BF16_F32_X6,
     }:
       raise ValueError(
           f"The precision '{precision}' is not supported by dot_general on CPU")
diff --git a/tests/lax_test.py b/tests/lax_test.py
@@ -1082,6 +1082,9 @@ def testDotAlgorithm(self, algorithm, dtype):
           lax.DotAlgorithmPreset.F16_F16_F16,
           lax.DotAlgorithmPreset.F32_F32_F32,
           lax.DotAlgorithmPreset.F64_F64_F64,
+          lax.DotAlgorithmPreset.BF16_BF16_F32,
+          lax.DotAlgorithmPreset.BF16_BF16_F32_X3,
+          lax.DotAlgorithmPreset.BF16_BF16_F32_X6,
       }:
         raise SkipTest(
             f"The dot algorithm '{algorithm}' is not supported on CPU.")