Skip to content

Commit 70ed10d

Browse files
InstLatx64maciejwieczorretman
authored andcommitted
AVX512BF16 fix
AVX512BF16 fix: -- VCVTNEPS2BF16 operand count -- VDPBF16PS operand size AVX512BF16 test files Checked with XED version: [v2025.06.08]
1 parent 0fb3391 commit 70ed10d

File tree

3 files changed

+224
-8
lines changed

3 files changed

+224
-8
lines changed

test/avx512bf16-64.asm

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
BITS 64
2+
vcvtne2ps2bf16 xmm1, xmm2, xmm3
3+
vcvtne2ps2bf16 ymm1, ymm2, ymm3
4+
vcvtne2ps2bf16 zmm1, zmm2, zmm3
5+
6+
vcvtneps2bf16 xmm1, xmm2
7+
vcvtneps2bf16 xmm1, ymm2
8+
vcvtneps2bf16 ymm1, zmm2
9+
10+
vdpbf16ps xmm1, xmm2, xmm3
11+
vdpbf16ps ymm1, ymm2, ymm3
12+
vdpbf16ps zmm1, zmm2, zmm3
13+
14+
vcvtne2ps2bf16 xmm1, xmm2, [rax]
15+
vcvtne2ps2bf16 ymm1, ymm2, [rcx+1]
16+
vcvtne2ps2bf16 zmm1, zmm2, [2*rdx+64]
17+
18+
vcvtneps2bf16 xmm1, oword [rax]
19+
vcvtneps2bf16 xmm1, yword [rcx+1]
20+
vcvtneps2bf16 ymm1, [2*rdx+64]
21+
22+
vdpbf16ps xmm1, xmm2, [rax]
23+
vdpbf16ps ymm1, ymm2, [rcx+1]
24+
vdpbf16ps zmm1, zmm2, [2*rdx+64]
25+
26+
vcvtne2ps2bf16 xmm1, xmm2, [rax]{1to4}
27+
vcvtne2ps2bf16 ymm1, ymm2, [rcx+1]{1to8}
28+
vcvtne2ps2bf16 zmm1, zmm2, [2*rdx+4]{1to16}
29+
30+
vcvtneps2bf16 xmm1, [rax]{1to4}
31+
vcvtneps2bf16 xmm1, [rcx+1]{1to8}
32+
vcvtneps2bf16 ymm1, [2*rdx+4]{1to16}
33+
34+
vdpbf16ps xmm1, xmm2, [rax]{1to4}
35+
vdpbf16ps ymm1, ymm2, [rcx+1]{1to8}
36+
vdpbf16ps zmm1, zmm2, [2*rdx+4]{1to16}
37+
38+
vcvtne2ps2bf16 xmm1 {k1}, xmm2, xmm3
39+
vcvtne2ps2bf16 ymm1 {k1}, ymm2, ymm3
40+
vcvtne2ps2bf16 zmm1 {k1}, zmm2, zmm3
41+
42+
vcvtneps2bf16 xmm1 {k1}, xmm2
43+
vcvtneps2bf16 xmm1 {k1}, ymm2
44+
vcvtneps2bf16 ymm1 {k1}, zmm2
45+
46+
vdpbf16ps xmm1 {k1}, xmm2, xmm3
47+
vdpbf16ps ymm1 {k1}, ymm2, ymm3
48+
vdpbf16ps zmm1 {k1}, zmm2, zmm3
49+
50+
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [rax]
51+
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [rcx+1]
52+
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*rdx+64]
53+
54+
vcvtneps2bf16 xmm1 {k1}, oword [rax]
55+
vcvtneps2bf16 xmm1 {k1}, yword [rcx+1]
56+
vcvtneps2bf16 ymm1 {k1}, [2*rdx+64]
57+
58+
vdpbf16ps xmm1 {k1}, xmm2, [rax]
59+
vdpbf16ps ymm1 {k1}, ymm2, [rcx+1]
60+
vdpbf16ps zmm1 {k1}, zmm2, [2*rdx+64]
61+
62+
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [rax]{1to4}
63+
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [rcx+1]{1to8}
64+
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*rdx+4]{1to16}
65+
66+
vcvtneps2bf16 xmm1 {k1}, [rax]{1to4}
67+
vcvtneps2bf16 xmm1 {k1}, [rcx+1]{1to8}
68+
vcvtneps2bf16 ymm1 {k1}, [2*rdx+4]{1to16}
69+
70+
vdpbf16ps xmm1 {k1}, xmm2, [rax]{1to4}
71+
vdpbf16ps ymm1 {k1}, ymm2, [rcx+1]{1to8}
72+
vdpbf16ps zmm1 {k1}, zmm2, [2*rdx+4]{1to16}
73+
74+
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, xmm3
75+
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, ymm3
76+
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, zmm3
77+
78+
vcvtneps2bf16 xmm1 {k1}{z}, xmm2
79+
vcvtneps2bf16 xmm1 {k1}{z}, ymm2
80+
vcvtneps2bf16 ymm1 {k1}{z}, zmm2
81+
82+
vdpbf16ps xmm1 {k1}{z}, xmm2, xmm3
83+
vdpbf16ps ymm1 {k1}{z}, ymm2, ymm3
84+
vdpbf16ps zmm1 {k1}{z}, zmm2, zmm3
85+
86+
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [rax]
87+
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [rcx+1]
88+
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*rdx+64]
89+
90+
vcvtneps2bf16 xmm1 {k1}{z}, oword [rax]
91+
vcvtneps2bf16 xmm1 {k1}{z}, yword [rcx+1]
92+
vcvtneps2bf16 ymm1 {k1}{z}, [2*rax+64]
93+
94+
vdpbf16ps xmm1 {k1}{z}, xmm2, [rax]
95+
vdpbf16ps ymm1 {k1}{z}, ymm2, [rcx+1]
96+
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*rdx+64]
97+
98+
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [rax]{1to4}
99+
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [rcx+1]{1to8}
100+
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*rdx+4]{1to16}
101+
102+
vcvtneps2bf16 xmm1 {k1}{z}, [rax]{1to4}
103+
vcvtneps2bf16 xmm1 {k1}{z}, [rcx+1]{1to8}
104+
vcvtneps2bf16 ymm1 {k1}{z}, [2*rdx+4]{1to16}
105+
106+
vdpbf16ps xmm1 {k1}{z}, xmm2, [rax]{1to4}
107+
vdpbf16ps ymm1 {k1}{z}, ymm2, [rcx+1]{1to8}
108+
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*rdx+4]{1to16}

test/avx512bf16.asm

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
BITS 32
2+
vcvtne2ps2bf16 xmm1, xmm2, xmm3
3+
vcvtne2ps2bf16 ymm1, ymm2, ymm3
4+
vcvtne2ps2bf16 zmm1, zmm2, zmm3
5+
6+
vcvtneps2bf16 xmm1, xmm2
7+
vcvtneps2bf16 xmm1, ymm2
8+
vcvtneps2bf16 ymm1, zmm2
9+
10+
vdpbf16ps xmm1, xmm2, xmm3
11+
vdpbf16ps ymm1, ymm2, ymm3
12+
vdpbf16ps zmm1, zmm2, zmm3
13+
14+
vcvtne2ps2bf16 xmm1, xmm2, [eax]
15+
vcvtne2ps2bf16 ymm1, ymm2, [ecx+1]
16+
vcvtne2ps2bf16 zmm1, zmm2, [2*edx+64]
17+
18+
vcvtneps2bf16 xmm1, oword [eax]
19+
vcvtneps2bf16 xmm1, yword [ecx+1]
20+
vcvtneps2bf16 ymm1, [2*edx+64]
21+
22+
vdpbf16ps xmm1, xmm2, [eax]
23+
vdpbf16ps ymm1, ymm2, [ecx+1]
24+
vdpbf16ps zmm1, zmm2, [2*edx+64]
25+
26+
vcvtne2ps2bf16 xmm1, xmm2, [eax]{1to4}
27+
vcvtne2ps2bf16 ymm1, ymm2, [ecx+1]{1to8}
28+
vcvtne2ps2bf16 zmm1, zmm2, [2*edx+4]{1to16}
29+
30+
vcvtneps2bf16 xmm1, [eax]{1to4}
31+
vcvtneps2bf16 xmm1, [ecx+1]{1to8}
32+
vcvtneps2bf16 ymm1, [2*edx+4]{1to16}
33+
34+
vdpbf16ps xmm1, xmm2, [eax]{1to4}
35+
vdpbf16ps ymm1, ymm2, [ecx+1]{1to8}
36+
vdpbf16ps zmm1, zmm2, [2*edx+4]{1to16}
37+
38+
vcvtne2ps2bf16 xmm1 {k1}, xmm2, xmm3
39+
vcvtne2ps2bf16 ymm1 {k1}, ymm2, ymm3
40+
vcvtne2ps2bf16 zmm1 {k1}, zmm2, zmm3
41+
42+
vcvtneps2bf16 xmm1 {k1}, xmm2
43+
vcvtneps2bf16 xmm1 {k1}, ymm2
44+
vcvtneps2bf16 ymm1 {k1}, zmm2
45+
46+
vdpbf16ps xmm1 {k1}, xmm2, xmm3
47+
vdpbf16ps ymm1 {k1}, ymm2, ymm3
48+
vdpbf16ps zmm1 {k1}, zmm2, zmm3
49+
50+
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [eax]
51+
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [ecx+1]
52+
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*edx+64]
53+
54+
vcvtneps2bf16 xmm1 {k1}, oword [eax]
55+
vcvtneps2bf16 xmm1 {k1}, yword [ecx+1]
56+
vcvtneps2bf16 ymm1 {k1}, [2*edx+64]
57+
58+
vdpbf16ps xmm1 {k1}, xmm2, [eax]
59+
vdpbf16ps ymm1 {k1}, ymm2, [ecx+1]
60+
vdpbf16ps zmm1 {k1}, zmm2, [2*edx+64]
61+
62+
vcvtne2ps2bf16 xmm1 {k1}, xmm2, [eax]{1to4}
63+
vcvtne2ps2bf16 ymm1 {k1}, ymm2, [ecx+1]{1to8}
64+
vcvtne2ps2bf16 zmm1 {k1}, zmm2, [2*edx+4]{1to16}
65+
66+
vcvtneps2bf16 xmm1 {k1}, [eax]{1to4}
67+
vcvtneps2bf16 xmm1 {k1}, [ecx+1]{1to8}
68+
vcvtneps2bf16 ymm1 {k1}, [2*edx+4]{1to16}
69+
70+
vdpbf16ps xmm1 {k1}, xmm2, [eax]{1to4}
71+
vdpbf16ps ymm1 {k1}, ymm2, [ecx+1]{1to8}
72+
vdpbf16ps zmm1 {k1}, zmm2, [2*edx+4]{1to16}
73+
74+
vcvtne2ps2bf16 xmm1 {k1}, xmm2, xmm3
75+
vcvtne2ps2bf16 ymm1 {k1}, ymm2, ymm3
76+
vcvtne2ps2bf16 zmm1 {k1}, zmm2, zmm3
77+
78+
vcvtneps2bf16 xmm1 {k1}, xmm2
79+
vcvtneps2bf16 xmm1 {k1}, ymm2
80+
vcvtneps2bf16 ymm1 {k1}, zmm2
81+
82+
vdpbf16ps xmm1 {k1}{z}, xmm2, xmm3
83+
vdpbf16ps ymm1 {k1}{z}, ymm2, ymm3
84+
vdpbf16ps zmm1 {k1}{z}, zmm2, zmm3
85+
86+
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [eax]
87+
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [ecx+1]
88+
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*edx+64]
89+
90+
vcvtneps2bf16 xmm1 {k1}{z}, oword [eax]
91+
vcvtneps2bf16 xmm1 {k1}{z}, yword [ecx+1]
92+
vcvtneps2bf16 ymm1 {k1}{z}, [2*edx+64]
93+
94+
vdpbf16ps xmm1 {k1}{z}, xmm2, [eax]
95+
vdpbf16ps ymm1 {k1}{z}, ymm2, [ecx+1]
96+
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*edx+64]
97+
98+
vcvtne2ps2bf16 xmm1 {k1}{z}, xmm2, [eax]{1to4}
99+
vcvtne2ps2bf16 ymm1 {k1}{z}, ymm2, [ecx+1]{1to8}
100+
vcvtne2ps2bf16 zmm1 {k1}{z}, zmm2, [2*edx+4]{1to16}
101+
102+
vcvtneps2bf16 xmm1 {k1}{z}, [eax]{1to4}
103+
vcvtneps2bf16 xmm1 {k1}{z}, [ecx+1]{1to8}
104+
vcvtneps2bf16 ymm1 {k1}{z}, [2*edx+4]{1to16}
105+
106+
vdpbf16ps xmm1 {k1}{z}, xmm2, [eax]{1to4}
107+
vdpbf16ps ymm1 {k1}{z}, ymm2, [ecx+1]{1to8}
108+
vdpbf16ps zmm1 {k1}{z}, zmm2, [2*edx+4]{1to16}

x86/insns.dat

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5396,15 +5396,15 @@ XRESLDTRK void [ f2 0f 01 e9] TSXLDTRK
53965396
XSUSLDTRK void [ f2 0f 01 e8] TSXLDTRK
53975397

53985398
;# AVX512 Bfloat16 instructions
5399-
VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f2.0f38.w0 72 /r] AVX512BF16
5400-
VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f2.0f38.w0 72 /r] AVX512BF16
5399+
VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f2.0f38.w0 72 /r] AVX512BF16,AVX512VL
5400+
VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f2.0f38.w0 72 /r] AVX512BF16,AVX512VL
54015401
VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm:fv: evex.512.f2.0f38.w0 72 /r] AVX512BF16
5402-
VCVTNEPS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f3.0f38.w0 72 /r] AVX512BF16
5403-
VCVTNEPS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f3.0f38.w0 72 /r] AVX512BF16
5404-
VCVTNEPS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm:fv: evex.512.f3.0f38.w0 72 /r] AVX512BF16
5405-
VDPBF16PS xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f3.0f38.w0 52 /r] AVX512BF16
5406-
VDPBF16PS ymmreg|mask|z,ymmreg*,ymmrm128|b32 [rvm:fv: evex.256.f3.0f38.w0 52 /r] AVX512BF16
5407-
VDPBF16PS zmmreg|mask|z,zmmreg*,zmmrm128|b32 [rvm:fv: evex.512.f3.0f38.w0 52 /r] AVX512BF16
5402+
VCVTNEPS2BF16 xmmreg|mask|z,xmmrm128|b32 [rm:fv: evex.128.f3.0f38.w0 72 /r] AVX512BF16,AVX512VL
5403+
VCVTNEPS2BF16 xmmreg|mask|z,ymmrm256|b32 [rm:fv: evex.256.f3.0f38.w0 72 /r] AVX512BF16,AVX512VL
5404+
VCVTNEPS2BF16 ymmreg|mask|z,zmmrm512|b32 [rm:fv: evex.512.f3.0f38.w0 72 /r] AVX512BF16
5405+
VDPBF16PS xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f3.0f38.w0 52 /r] AVX512BF16,AVX512VL
5406+
VDPBF16PS ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f3.0f38.w0 52 /r] AVX512BF16,AVX512VL
5407+
VDPBF16PS zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm:fv: evex.512.f3.0f38.w0 52 /r] AVX512BF16
54085408

54095409
;# AVX512 mask intersect instructions
54105410
VP2INTERSECTD kreg|rs2,xmmreg,xmmrm128|b32 [rvm:fv: evex.nds.128.f2.0f38.w0 68 /r] AVX512VL,AVX512VP2INTERSECT

0 commit comments

Comments
 (0)