Skip to content

Commit 48455d1

Browse files
remove production name (#755)
1 parent 5653590 commit 48455d1

File tree

7 files changed

+7
-7
lines changed

7 files changed

+7
-7
lines changed

python/perf-kernels/06-fused-attention-transV.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,7 @@ def test_op_bwd(Z, H, N_CTX, D_HEAD, dtype=torch.float16):
825825
assert torch.allclose(ref_out, tri_out, atol=1e-2, rtol=0)
826826
if torch.version.hip is None:
827827
assert torch.allclose(ref_dv, tri_dv, atol=1e-2, rtol=0)
828-
# The current block size for MI200 series is 64x64. This results in
828+
# The current block size for gfx90a and gfx908 series is 64x64. This results in
829829
# larger differences in float results due to rounding.
830830
else:
831831
assert torch.allclose(ref_dv, tri_dv, atol=5e-2, rtol=0)

python/perf-kernels/flash-attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1833,7 +1833,7 @@ def test_op_bwd(Z, H, N_CTX, D_HEAD, qseqlen_not_equal_kseqlen, causal, torch_sd
18331833
#print(tri_dv)
18341834
# compare
18351835
torch.testing.assert_close(ref_out, tri_out, atol=1e-2, rtol=0)
1836-
# The current block size for MI200 series is 64x64. This results in
1836+
# The current block size for gfx90a and gfx908 series is 64x64. This results in
18371837
# larger differences in float results due to rounding.
18381838

18391839
if dtype == torch.bfloat16:

python/perf-kernels/streamk/03-matrix-multiplication-stream-k.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def _call(a: torch.Tensor, b: torch.Tensor, c: torch.Tensor, bias: torch.Tensor,
7373
# compute grid (work to do per SM on the first wave)
7474
grids = total_programs_streamk
7575
stride_bias = bias.stride(0) if use_bias else 0
76-
# MI300X settings, MI250 set num_xcds = 1
76+
# gfx942 settings, gfx90a set num_xcds = 1
7777
num_xcds = 8
7878
kk = streamk_gemm[(grids, )](
7979
a,

python/perf-kernels/streamk/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ The plan is to use this version as the base version for the future triton stream
6969

7070
- use atomics for spinning lock to replace atomic_add for the final output.
7171

72-
- pid renumbering based on chiplet structure of MI300X
72+
- pid renumbering based on chiplet structure of gfx942
7373

7474
- dynamic grid setting
7575

python/perf-kernels/streamk/tune_streamk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def matmul(kernel_func, a, b, c, bias, P, locks, num_sms, block_m, block_n, bloc
398398
m_tiles = triton.cdiv(M, block_m)
399399
n_tiles = triton.cdiv(N, block_n)
400400
streamk_tiles = m_tiles * n_tiles % num_sms
401-
# change num_xcds = 1 if using MI250
401+
# change num_xcds = 1 if using gfx90a
402402
num_xcds = 8
403403
kernel_func[
404404
grid,

python/perf-kernels/streamk/utils/gemm_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def _call(a: torch.Tensor, b: torch.Tensor, c: torch.Tensor, bias: torch.Tensor,
6868
grids = min(total_programs_streamk, total_tiles)
6969
total_programs_streamk = min(total_programs_streamk, total_tiles)
7070
stride_bias = bias.stride(0) if use_bias else 0
71-
# MI300X settings, MI250 set num_xcds = 1
71+
# gfx942 settings, gfx90a set num_xcds = 1
7272
num_xcds = 8
7373

7474
kk = streamk_gemm[(grids, )](

python/perf-kernels/tools/plot-layout/plot_layout.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def checkMfmaValidity(mfmaNonKDim, kWidth, kGroup, dtype_a, dtype_b, trans, scal
302302
dtype_a == dtype_b), f"Cannot do mixed precision mfma with {dtype_a} and {dtype_b}"
303303
'''
304304
Check mfma size according to data types
305-
* refers to newly added instructions on MI350
305+
* refers to newly added instructions on gfx950
306306
Both dtyes are f4 or fp6 or bf6
307307
*mfma_f32_16x16x128_f8f6f4: kWidth = 32, kGroup = 1
308308
*mfma_f32_32x32x64_f8f6f4: kWidth = 32, kGroup = 1

0 commit comments

Comments
 (0)