Skip to content

Commit 6a88117

Browse files
committed
Add test
Signed-off-by: Antoni Viros i Martin <[email protected]>
1 parent b12dc58 commit 6a88117

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

tests/aiu_addons/test_fp8_addon.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright The FMS Model Optimizer Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Test suite for FMS addon introducing FP8 functionalities"""
15+
16+
# Third Party
17+
import pytest
18+
import torch
19+
20+
# Local
21+
import fms_mo.aiu_addons.fp8.fp8_spyre_op # pylint: disable=unused-import
22+
23+
24+
def test_fp8_registration() -> None:
25+
"""
26+
Ensure fp8 ops are registered properly.
27+
"""
28+
29+
assert hasattr(torch.ops, "spyre")
30+
assert hasattr(torch.ops.spyre, "scaled_bmm")
31+
assert hasattr(torch.ops.spyre, "scaled_paged_attn_store")
32+
assert hasattr(torch.ops.spyre, "scaled_paged_attn_compute")
33+
34+
35+
# This test requires an H100 or higher GPU to run
36+
@pytest.mark.skipif(
37+
not torch.cuda.is_available()
38+
or (torch.cuda.is_available() and torch.cuda.get_device_capability() < (8, 9)),
39+
reason="FP8 is only available on GPUs with device level 8.9 or higher",
40+
)
41+
def test_fp8_op() -> None:
42+
"""Validate output shapes of GPTQ W4A16 tensors.
43+
Note: this AIU-compatible operation only returns a zero tensor of the
44+
expected shape, it does not perform a real W4A16 matmul operation.
45+
"""
46+
# Local
47+
from fms_mo.aiu_addons.fp8.fp8_attn import _math_fp8_compute_op
48+
49+
query = torch.randn((1, 32, 64, 128), dtype=torch.bfloat16, device="cuda")
50+
key = torch.randn((1, 32, 64, 128), dtype=torch.bfloat16, device="cuda")
51+
value = torch.randn((1, 32, 64, 128), dtype=torch.bfloat16, device="cuda")
52+
53+
out = _math_fp8_compute_op(query, key, value, 32, 32, 0.0, None)
54+
assert out.size() == query.size()

0 commit comments

Comments
 (0)