Fix negative index handling in MultiHeadAttention attention_axes

utsab345 · utsab345 · commit deebbc6a3877 · 2025-10-05T07:49:18.000+05:45
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@ __pycache__
 **/.vscode test/**
 **/.vscode-smoke/**
 **/.venv*/
+venv
 bin/**
 build/**
 obj/**
diff --git a/integration_tests/test_multi_head_attention_negative_axis.py b/integration_tests/test_multi_head_attention_negative_axis.py
@@ -0,0 +1,28 @@
+import numpy as np
+
+import keras
+
+
+def test_attention_axes_negative_indexing_matches_positive():
+    x = np.random.normal(size=(2, 3, 8, 4))
+
+    mha_pos = keras.layers.MultiHeadAttention(
+        num_heads=2, key_dim=4, attention_axes=2
+    )
+    mha_neg = keras.layers.MultiHeadAttention(
+        num_heads=2, key_dim=4, attention_axes=-2
+    )
+
+    _ = mha_pos(x, x)
+    _ = mha_neg(x, x)
+
+    mha_neg.set_weights(mha_pos.get_weights())
+
+    z_pos, a_pos = mha_pos(x, x, return_attention_scores=True)
+    z_neg, a_neg = mha_neg(x, x, return_attention_scores=True)
+
+    assert z_pos.shape == z_neg.shape
+    assert a_pos.shape == a_neg.shape
+
+    np.testing.assert_allclose(z_pos, z_neg, rtol=1e-5, atol=1e-5)
+    np.testing.assert_allclose(a_pos, a_neg, rtol=1e-5, atol=1e-5)
diff --git a/integration_tests/test_save_img.py b/integration_tests/test_save_img.py
@@ -24,4 +24,4 @@ def test_save_jpg(tmp_path, shape, name):
     # Check that the image was saved correctly and converted to RGB if needed.
     loaded_img = load_img(path)
     loaded_array = img_to_array(loaded_img)
-    assert loaded_array.shape == (50, 50, 3)
+    assert loaded_array.shape == (50, 50, 3)
diff --git a/keras/src/layers/attention/multi_head_attention.py b/keras/src/layers/attention/multi_head_attention.py
@@ -378,7 +378,17 @@ def _build_attention(self, rank):
         if self._attention_axes is None:
             self._attention_axes = tuple(range(1, rank - 2))
         else:
-            self._attention_axes = tuple(self._attention_axes)
+            # Normalize negative indices relative to INPUT rank (rank - 1)
+            input_rank = rank - 1
+            normalized_axes = []
+            for ax in self._attention_axes:
+                if ax < 0:
+                    # Normalize relative to input rank
+                    normalized_ax = input_rank + ax
+                else:
+                    normalized_ax = ax
+                normalized_axes.append(normalized_ax)
+            self._attention_axes = tuple(normalized_axes)
         (
             self._dot_product_equation,
             self._combine_equation,
@@ -760,6 +770,12 @@ def _build_attention_equation(rank, attn_axes):
     Returns:
         Einsum equations.
     """
+    # Normalize negative indices to positive indices
+    if isinstance(attn_axes, (list, tuple)):
+        attn_axes = tuple(ax % rank if ax < 0 else ax for ax in attn_axes)
+    else:
+        attn_axes = (attn_axes % rank if attn_axes < 0 else attn_axes,)
+
     target_notation = ""
     for i in range(rank):
         target_notation += _index_to_einsum_variable(i)
diff --git a/keras/src/utils/image_utils.py b/keras/src/utils/image_utils.py
@@ -179,7 +179,7 @@ def save_img(path, x, data_format=None, file_format=None, scale=True, **kwargs):
     if file_format is not None and file_format.lower() == "jpg":
         file_format = "jpeg"
     img = array_to_img(x, data_format=data_format, scale=scale)
-    if img.mode == "RGBA" and  file_format == "jpeg":
+    if img.mode == "RGBA" and file_format == "jpeg":
         warnings.warn(
             "The JPEG format does not support RGBA images, converting to RGB."
         )

Original file line number	Diff line number	Diff line change
`@@ -179,7 +179,7 @@ def save_img(path, x, data_format=None, file_format=None, scale=True, **kwargs):`
`179`	`179`	`if file_format is not None and file_format.lower() == "jpg":`
`180`	`180`	`file_format = "jpeg"`
`181`	`181`	`img = array_to_img(x, data_format=data_format, scale=scale)`
`182`		`- if img.mode == "RGBA" and file_format == "jpeg":`
	`182`	`+ if img.mode == "RGBA" and file_format == "jpeg":`
`183`	`183`	`warnings.warn(`
`184`	`184`	`"The JPEG format does not support RGBA images, converting to RGB."`
`185`	`185`	`)`