Skip to content

Commit 55dc058

Browse files
author
H. Peter Anvin
committed
Document CPU LATEVEX, add CPU EVEX and CPU VEX flags
Document CPU LATEVEX and the associated prefixes; add CPU EVEX and CPU VEX flags to further control encodings. Fix the error message for invalid encodings due to flags. Signed-off-by: H. Peter Anvin <[email protected]>
1 parent 494d953 commit 55dc058

File tree

5 files changed

+145
-39
lines changed

5 files changed

+145
-39
lines changed

asm/assemble.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -934,8 +934,12 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
934934
nasm_nonfatal("instruction not supported in %d-bit mode", bits);
935935
break;
936936
case MERR_ENCMISMATCH:
937-
nasm_nonfatal("instruction not encodable with %s prefix",
938-
prefix_name(instruction->prefixes[PPS_REX]));
937+
if (!instruction->prefixes[PPS_REX]) {
938+
nasm_nonfatal("instruction not encodable without explicit prefix");
939+
} else {
940+
nasm_nonfatal("instruction not encodable with %s prefix",
941+
prefix_name(instruction->prefixes[PPS_REX]));
942+
}
939943
break;
940944
case MERR_BADBND:
941945
case MERR_BADREPNE:
@@ -2552,9 +2556,16 @@ static enum match_result matches(const struct itemplate *itemp,
25522556
return MERR_ENCMISMATCH;
25532557
break;
25542558
default:
2555-
if (itemp_has(itemp, IF_LATEVEX)) {
2556-
if (!iflag_test(&cpu, IF_LATEVEX))
2559+
if (itemp_has(itemp, IF_EVEX)) {
2560+
if (!iflag_test(&cpu, IF_EVEX))
2561+
return MERR_ENCMISMATCH;
2562+
} else if (itemp_has(itemp, IF_VEX)) {
2563+
if (!iflag_test(&cpu, IF_VEX)) {
25572564
return MERR_ENCMISMATCH;
2565+
} else if (itemp_has(itemp, IF_LATEVEX)) {
2566+
if (!iflag_test(&cpu, IF_LATEVEX) && iflag_test(&cpu, IF_EVEX))
2567+
return MERR_ENCMISMATCH;
2568+
}
25582569
}
25592570
break;
25602571
}

asm/directiv.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ void set_cpu(const char *value)
111111
{ "any", IF_ANY },
112112
{ "all", IF_ANY },
113113
{ "latevex", IF_LATEVEX },
114-
{ NULL, IF_DEFAULT } /* End of list */
114+
{ "evex", IF_EVEX },
115+
{ "vex", IF_VEX },
116+
{ NULL, 0 }
115117
};
116118

117119
if (!value) {

doc/changes.src

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,20 @@ reservations (e.g. \c{dw ?}.)
6868
\b Allow forcing an instruction in 64-bit mode to have a (possibly
6969
redundant) REX prefix, using the syntax \i\c{\{rex\}} as a prefix.
7070

71+
\b Add a \c{\{vex\}} prefix to enforce VEX (AVX) encoding of an
72+
instruction, either using the 2- or 3-byte VEX prefixes.
73+
74+
\b The \c{CPU} directive has been augmented to allow control of
75+
generation of VEX (AVX) versus EVEX (AVX-512) instruction formats, see
76+
\k{CPU}.
77+
78+
\b Some recent instructions that previously have been only available
79+
using EVEX encodings are now also encodable using VEX (AVX)
80+
encodings. For backwards compatibility these encodings are not enabled
81+
by default, but can be generated either via an explicit \c{\{vex\}}
82+
prefix or by specifying either \c{CPU LATEVEX} or \c{CPU NOEVEX}; see
83+
\k{CPU}.
84+
7185
\b Document the already existing \c{%unimacro} directive. See \k{unmacro}.
7286

7387
\b Fix a code range generation bug in the DWARF debug format
@@ -767,9 +781,10 @@ options to indicate whether all relevant branches should be getting
767781
\c{BND} prefixes. This is expected to be the normal for use in MPX
768782
code.
769783

770-
\b Add \c{{evex}}, \c{{vex3}} and \c{{vex2}} instruction prefixes to
771-
have NASM encode the corresponding instruction, if possible, with an EVEX,
772-
3-byte VEX, or 2-byte VEX prefix, respectively.
784+
\b Add \c{\{evex\}}, \c{\{vex3\}} and \c{\{vex2\}} instruction
785+
prefixes to have NASM encode the corresponding instruction, if
786+
possible, with an EVEX, 3-byte VEX, or 2-byte VEX prefix,
787+
respectively.
773788

774789
\b Support for section names longer than 8 bytes in Win32/Win64 COFF.
775790

doc/nasmdoc.src

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5594,47 +5594,87 @@ are excluded from the symbol mangling and also not marked as global.
55945594
\H{CPU} \i\c{CPU}: Defining CPU Dependencies
55955595

55965596
The \i\c{CPU} directive restricts assembly to those instructions which
5597-
are available on the specified CPU.
5597+
are available on the specified CPU. At the moment, it is primarily
5598+
used to enforce unavailable \e{encodings} of instructions, such as
5599+
5-byte jumps on the 8080.
55985600

5599-
Options are:
5601+
(If someone would volunteer to work through the database and add
5602+
proper annotations to each instruction, this could be greatly
5603+
improved. Please contact the developers to volunteer, see \{contact}.)
56005604

5601-
\b\c{CPU 8086} Assemble only 8086 instruction set
5605+
Current CPU keywords are:
56025606

5603-
\b\c{CPU 186} Assemble instructions up to the 80186 instruction set
5607+
\b\c{CPU 8086} - Assemble only 8086 instruction set
56045608

5605-
\b\c{CPU 286} Assemble instructions up to the 286 instruction set
5609+
\b\c{CPU 186} - Assemble instructions up to the 80186 instruction set
56065610

5607-
\b\c{CPU 386} Assemble instructions up to the 386 instruction set
5611+
\b\c{CPU 286} - Assemble instructions up to the 286 instruction set
56085612

5609-
\b\c{CPU 486} 486 instruction set
5613+
\b\c{CPU 386} - Assemble instructions up to the 386 instruction set
56105614

5611-
\b\c{CPU 586} Pentium instruction set
5615+
\b\c{CPU 486} - 486 instruction set
56125616

5613-
\b\c{CPU PENTIUM} Same as 586
5617+
\b\c{CPU 586} - Pentium instruction set
56145618

5615-
\b\c{CPU 686} P6 instruction set
5619+
\b\c{CPU PENTIUM} - Same as 586
56165620

5617-
\b\c{CPU PPRO} Same as 686
5621+
\b\c{CPU 686} - P6 instruction set
56185622

5619-
\b\c{CPU P2} Same as 686
5623+
\b\c{CPU PPRO} - Same as 686
56205624

5621-
\b\c{CPU P3} Pentium III (Katmai) instruction sets
5625+
\b\c{CPU P2} - Same as 686
56225626

5623-
\b\c{CPU KATMAI} Same as P3
5627+
\b\c{CPU P3} - Pentium III (Katmai) instruction sets
56245628

5625-
\b\c{CPU P4} Pentium 4 (Willamette) instruction set
5629+
\b\c{CPU KATMAI} - Same as P3
56265630

5627-
\b\c{CPU WILLAMETTE} Same as P4
5631+
\b\c{CPU P4} - Pentium 4 (Willamette) instruction set
56285632

5629-
\b\c{CPU PRESCOTT} Prescott instruction set
5633+
\b\c{CPU WILLAMETTE} - Same as P4
56305634

5631-
\b\c{CPU X64} x86-64 (x64/AMD64/Intel 64) instruction set
5635+
\b\c{CPU PRESCOTT} - Prescott instruction set
56325636

5633-
\b\c{CPU IA64} IA64 CPU (in x86 mode) instruction set
5637+
\b\c{CPU X64} - x86-64 (x64/AMD64/Intel 64) instruction set
56345638

5635-
All options are case insensitive. All instructions will be selected
5636-
only if they apply to the selected CPU or lower. By default, all
5637-
instructions are available.
5639+
\b\c{CPU IA64} - IA64 CPU (in x86 mode) instruction set
5640+
5641+
\b\c{CPU DEFAULT} - All available instructions
5642+
5643+
\b\c{CPU ALL} - All available instructions \e{and flags}
5644+
5645+
All options are case insensitive.
5646+
5647+
In addition, optional flags can be specified to modify the instruction
5648+
selections. These can be combined with a CPU declaration or specified
5649+
alone. They can be prefixed by \c{+} (add flag, default), \c{-}
5650+
(remove flag) or \c{*} (set flag to default); these prefixes are
5651+
"sticky", so:
5652+
5653+
\c cpu -foo,bar
5654+
5655+
means remove both the \c{foo} and \c{bar} options.
5656+
5657+
If prefixed with \c{no}, it inverts the meaning of the flag, but this
5658+
is not sticky, so:
5659+
5660+
\c cpu nofoo,bar
5661+
5662+
means remove the \c{foo} flag but add the \c{bar} flag.
5663+
5664+
Currently available flags are:
5665+
5666+
\b\c{EVEX} - Enable generation of EVEX (AVX-512) encoded instructions
5667+
without an explicit \c{\{evex\}} prefix. Default on.
5668+
5669+
\b\c\{VEX} - Enable generation of VEX (AVX) or XOP encoded
5670+
instructions without an explict \c{\{vex\}} prefix. Default on.
5671+
5672+
\b\c{LATEVEX} - Enable generation of VEX (AVX) encoding of
5673+
instructions where the VEX instructions forms were introduced
5674+
\e{after} the corresponding EVEX (AVX-512) instruction forms without
5675+
requiring an explicit \c{\{vex\}} prefix. This is implicit if the
5676+
\c{EVEX} flag is disabled and the \c{VEX} flag is enabled. Default
5677+
off.
56385678

56395679

56405680
\H{FLOAT} \i\c{FLOAT}: Handling of \I{floating-point, constants}floating-point constants
@@ -5643,19 +5683,19 @@ By default, floating-point constants are rounded to nearest, and IEEE
56435683
denormals are supported. The following options can be set to alter
56445684
this behaviour:
56455685

5646-
\b\c{FLOAT DAZ} Flush denormals to zero
5686+
\b\c{FLOAT DAZ} - Flush denormals to zero
56475687

5648-
\b\c{FLOAT NODAZ} Do not flush denormals to zero (default)
5688+
\b\c{FLOAT NODAZ} - Do not flush denormals to zero (default)
56495689

5650-
\b\c{FLOAT NEAR} Round to nearest (default)
5690+
\b\c{FLOAT NEAR} - Round to nearest (default)
56515691

5652-
\b\c{FLOAT UP} Round up (toward +Infinity)
5692+
\b\c{FLOAT UP} - Round up (toward +Infinity)
56535693

5654-
\b\c{FLOAT DOWN} Round down (toward -Infinity)
5694+
\b\c{FLOAT DOWN} - Round down (toward -Infinity)
56555695

5656-
\b\c{FLOAT ZERO} Round toward zero
5696+
\b\c{FLOAT ZERO} - Round toward zero
56575697

5658-
\b\c{FLOAT DEFAULT} Restore default settings
5698+
\b\c{FLOAT DEFAULT} - Restore default settings
56595699

56605700
The standard macros \i\c{__?FLOAT_DAZ?__}, \i\c{__?FLOAT_ROUND?__}, and
56615701
\i\c{__?FLOAT?__} contain the current state, as long as the programmer

test/latevex.asm

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
bits 64
22

33
%define YMMWORD yword
4-
4+
55
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
66
vpmadd52luq ymm16,ymm1,YMMWORD[32+rsi]
77
vpmadd52luq ymm17,ymm1,YMMWORD[64+rsi]
@@ -30,4 +30,42 @@
3030
vpmadd52luq ymm17,ymm2,YMMWORD[64+rcx]
3131
vpmadd52luq ymm18,ymm2,YMMWORD[96+rcx]
3232
vpmadd52luq ymm19,ymm2,YMMWORD[128+rcx]
33-
33+
34+
cpu default
35+
36+
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
37+
vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
38+
39+
cpu noevex
40+
41+
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
42+
vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
43+
44+
%ifdef ERROR
45+
vpmadd52luq ymm19,ymm2,YMMWORD[128+rcx]
46+
%endif
47+
48+
cpu evex,novex,latevex
49+
50+
vpmadd52luq ymm3,ymm1,YMMWORD[rsi]
51+
vpmadd52luq ymm3,ymm2,YMMWORD[rcx]
52+
53+
cpu default
54+
55+
vaddps ymm3,ymm1,YMMWORD[rsi]
56+
vaddps ymm3,ymm2,YMMWORD[rcx]
57+
58+
cpu novex
59+
60+
vaddps ymm3,ymm1,YMMWORD[rsi]
61+
vaddps ymm3,ymm2,YMMWORD[rcx]
62+
63+
%ifdef ERROR
64+
cpu noevex
65+
66+
vaddps ymm3,ymm1,YMMWORD[rsi]
67+
vaddps ymm3,ymm2,YMMWORD[rcx]
68+
%endif
69+
70+
{vex} vaddps ymm3,ymm1,YMMWORD[rsi]
71+
{vex} vaddps ymm3,ymm2,YMMWORD[rcx]

0 commit comments

Comments
 (0)