diff --git a/lld/test/COFF/lto-cpu-string.ll b/lld/test/COFF/lto-cpu-string.ll index c2a8df5f75bad..0f233b32bd43a 100644 --- a/lld/test/COFF/lto-cpu-string.ll +++ b/lld/test/COFF/lto-cpu-string.ll @@ -14,7 +14,7 @@ target triple = "x86_64-pc-windows-msvc19.14.26433" define dllexport void @foo() #0 { entry: - call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"() ret void } diff --git a/lld/test/ELF/lto/cpu-string.ll b/lld/test/ELF/lto/cpu-string.ll index 3697cbd6d9472..5444339007b0d 100644 --- a/lld/test/ELF/lto/cpu-string.ll +++ b/lld/test/ELF/lto/cpu-string.ll @@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo() #0 { entry: - call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"() ret void } diff --git a/lld/test/ELF/lto/mllvm.ll b/lld/test/ELF/lto/mllvm.ll index 883a9c8d8dc75..d09dcd0bf7b95 100644 --- a/lld/test/ELF/lto/mllvm.ll +++ b/lld/test/ELF/lto/mllvm.ll @@ -17,7 +17,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define void @_start() #0 { entry: - call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"() ret void } diff --git a/lld/test/MachO/lto-cpu-string.ll b/lld/test/MachO/lto-cpu-string.ll index dcd51bef6f446..3736a2e1f0fa8 100644 --- a/lld/test/MachO/lto-cpu-string.ll +++ b/lld/test/MachO/lto-cpu-string.ll @@ -16,7 +16,7 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define void @foo() #0 { entry: - call void asm sideeffect ".p2align 4, 0x90", "~{dirflag},~{fpsr},~{flags}"() + call void asm sideeffect ".p2align 4", "~{dirflag},~{fpsr},~{flags}"() ret void } diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 46fa2a74450e1..c6a4a74a220af 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -65,12 +65,20 @@ Changes to Interprocedural Optimizations Changes to the AArch64 Backend ------------------------------ +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. + Changes to the AMDGPU Backend ----------------------------- Changes to the ARM Backend -------------------------- +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. + Changes to the AVR Backend -------------------------- @@ -92,6 +100,10 @@ Changes to the PowerPC Backend Changes to the RISC-V Backend ----------------------------- +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. + Changes to the WebAssembly Backend ---------------------------------- @@ -101,6 +113,13 @@ Changes to the Windows Target Changes to the X86 Backend -------------------------- +* `.balign N, 0x90`, `.p2align N, 0x90`, and `.align N, 0x90` in code sections + now fill the required alignment space with repeating `0x90` bytes, rather than + using optimised NOP filling. Optimised NOP filling fills the space with NOP + instructions of various widths, not just those that use the `0x90` byte + encoding. To use optimised NOP filling in a code section, leave off the + "fillval" argument, i.e. `.balign N`, `.p2align N` or `.align N` respectively. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 992b69f1c5f32..1b9c624136e8b 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3462,17 +3462,6 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) { } } - if (HasFillExpr && FillExpr != 0) { - MCSection *Sec = getStreamer().getCurrentSectionOnly(); - if (Sec && Sec->isVirtualSection()) { - ReturnVal |= - Warning(FillExprLoc, "ignoring non-zero fill value in " + - Sec->getVirtualSectionKind() + " section '" + - Sec->getName() + "'"); - FillExpr = 0; - } - } - // Diagnose non-sensical max bytes to align. if (MaxBytesLoc.isValid()) { if (MaxBytesToFill < 1) { @@ -3489,13 +3478,20 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) { } } - // Check whether we should use optimal code alignment for this .align - // directive. const MCSection *Section = getStreamer().getCurrentSectionOnly(); assert(Section && "must have section to emit alignment"); - bool useCodeAlign = Section->useCodeAlign(); - if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && - ValueSize == 1 && useCodeAlign) { + + if (HasFillExpr && FillExpr != 0 && Section->isVirtualSection()) { + ReturnVal |= + Warning(FillExprLoc, "ignoring non-zero fill value in " + + Section->getVirtualSectionKind() + + " section '" + Section->getName() + "'"); + FillExpr = 0; + } + + // Check whether we should use optimal code alignment for this .align + // directive. + if (Section->useCodeAlign() && !HasFillExpr) { getStreamer().emitCodeAlignment( Align(Alignment), &getTargetParser().getSTI(), MaxBytesToFill); } else { diff --git a/llvm/test/MC/AArch64/align-fill-byte-zero.s b/llvm/test/MC/AArch64/align-fill-byte-zero.s new file mode 100644 index 0000000000000..eed8750845568 --- /dev/null +++ b/llvm/test/MC/AArch64/align-fill-byte-zero.s @@ -0,0 +1,22 @@ +// RUN: llvm-mc -triple aarch64 %s -o - | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -triple aarch64 -filetype obj %s -o - | \ +// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ + +// llvm.org/pr30955 - LLVM was handling `.balign , 0` strangely on +// non-x86 targets. + + .text + +// ASM: add x14, x14, #1 +// OBJ: 910005ce add x14, x14, #0x1 + add x14, x14, 0x1 + +// ASM: .p2align 4, 0x0 +// OBJ-NEXT: 00000000 udf #0x0 +// OBJ-NEXT: 00000000 udf #0x0 +// OBJ-NEXT: 00000000 udf #0x0 + .balign 0x10, 0 + +// ASM: add x14, x14, #1 +// OBJ-NEXT: 910005ce add x14, x14, #0x1 + add x14, x14, 0x1 diff --git a/llvm/test/MC/ARM/align-fill-byte-zero.s b/llvm/test/MC/ARM/align-fill-byte-zero.s new file mode 100644 index 0000000000000..cc1e27e80e920 --- /dev/null +++ b/llvm/test/MC/ARM/align-fill-byte-zero.s @@ -0,0 +1,40 @@ +// RUN: llvm-mc -triple armv7a %s -o - | FileCheck %s --check-prefix=ASM-ARM +// RUN: llvm-mc -triple armv7a -filetype obj %s -o - | \ +// RUN: llvm-objdump --triple=armv7a -dz - | FileCheck %s --check-prefix=OBJ-ARM + +// RUN: llvm-mc -triple thumbv7a %s -o - | FileCheck %s --check-prefix=ASM-THUMB +// RUN: llvm-mc -triple thumbv7a -filetype obj %s -o - | \ +// RUN: llvm-objdump --triple=thumbv7a -dz - | FileCheck %s --check-prefix=OBJ-THUMB + +// llvm.org/pr30955 - LLVM was handling `.balign , 0` strangely on +// non-x86 targets. + + .text + +// ASM-ARM: add r0, r0, #1 +// OBJ-ARM: e2800001 add r0, r0, #1 + +// ASM-THUMB: add.w r0, r0, #1 +// OBJ-THUMB: f100 0001 add.w r0, r0, #0x1 + add r0, r0, 0x1 + +// ASM-ARM: .p2align 4, 0x0 +// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0 +// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0 +// OBJ-ARM-NEXT: 00000000 andeq r0, r0, r0 + +// ASM-THUMB: .p2align 4, 0x0 +// OBJ-THUMB-NEXT: 0000 movs r0, r0 +// OBJ-THUMB-NEXT: 0000 movs r0, r0 +// OBJ-THUMB-NEXT: 0000 movs r0, r0 +// OBJ-THUMB-NEXT: 0000 movs r0, r0 +// OBJ-THUMB-NEXT: 0000 movs r0, r0 +// OBJ-THUMB-NEXT: 0000 movs r0, r0 + .balign 0x10, 0 + +// ASM-ARM: add r0, r0, #1 +// OBJ-ARM-NEXT: e2800001 add r0, r0, #1 + +// ASM-THUMB: add.w r0, r0, #1 +// OBJ-THUMB-NEXT: f100 0001 add.w r0, r0, #0x1 + add r0, r0, 0x1 diff --git a/llvm/test/MC/AsmParser/directive_align.s b/llvm/test/MC/AsmParser/directive_align.s index 0430f51740861..d71f559fd03da 100644 --- a/llvm/test/MC/AsmParser/directive_align.s +++ b/llvm/test/MC/AsmParser/directive_align.s @@ -1,6 +1,8 @@ # RUN: not llvm-mc -triple i386-apple-darwin9 %s 2> %t.err | FileCheck %s # RUN: FileCheck < %t.err %s --check-prefix=CHECK-WARN + .data + # CHECK: TEST0: # CHECK: .p2align 1 TEST0: diff --git a/llvm/test/MC/COFF/align-nops.s b/llvm/test/MC/COFF/align-nops.s index d6906d07148b6..7264d70938cea 100644 --- a/llvm/test/MC/COFF/align-nops.s +++ b/llvm/test/MC/COFF/align-nops.s @@ -4,7 +4,7 @@ .text f0: .long 0 - .align 8, 0x90 + .align 8 .long 0 .align 8 diff --git a/llvm/test/MC/ELF/align-nops.s b/llvm/test/MC/ELF/align-nops.s index 41a007111f406..5ce03d5a08aff 100644 --- a/llvm/test/MC/ELF/align-nops.s +++ b/llvm/test/MC/ELF/align-nops.s @@ -4,14 +4,14 @@ .text f0: .long 0 - .align 8, 0x00000090 + .align 8 .long 0 .align 8 // But not in another section .data .long 0 - .align 8, 0x00000090 + .align 8, 0x90 .long 0 .align 8 diff --git a/llvm/test/MC/MachO/x86_32-optimal_nop.s b/llvm/test/MC/MachO/x86_32-optimal_nop.s index bb784b9da9954..19655b5405152 100644 --- a/llvm/test/MC/MachO/x86_32-optimal_nop.s +++ b/llvm/test/MC/MachO/x86_32-optimal_nop.s @@ -5,7 +5,7 @@ ret # nop # 0x90 - .align 1, 0x90 + .align 1 ret # 2 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -13,14 +13,14 @@ ret # xchg %ax,%ax # 0x66, 0x90 - .align 2, 0x90 + .align 2 ret # 3 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros ret # nopl (%[re]ax) # 0x0f, 0x1f, 0x00 - .align 2, 0x90 + .align 2 ret # 4 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -30,7 +30,7 @@ ret # nopl 0(%[re]ax) # 0x0f, 0x1f, 0x40, 0x00 - .align 3, 0x90 + .align 3 ret # 5 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -39,7 +39,7 @@ ret # nopl 0(%[re]ax,%[re]ax,1) # 0x0f, 0x1f, 0x44, 0x00, 0x00 - .align 3, 0x90 + .align 3 ret # 6 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -47,14 +47,14 @@ ret # nopw 0(%[re]ax,%[re]ax,1) # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 - .align 3, 0x90 + .align 3 ret # 7 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros ret # nopl 0L(%[re]ax) # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 - .align 3, 0x90 + .align 3 ret # 8 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -68,7 +68,7 @@ ret # nopl 0L(%[re]ax,%[re]ax,1) # 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 - .align 3, 0x90 + .align 3 ret # 9 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -81,7 +81,7 @@ ret # nopw 0L(%[re]ax,%[re]ax,1) # 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 - .align 4, 0x90 + .align 4 ret # 10 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -94,7 +94,7 @@ ret # nopw %cs:0L(%[re]ax,%[re]ax,1) # 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 - .align 4, 0x90 + .align 4 ret # 11 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -105,7 +105,7 @@ ret # nopw %cs:0L(%[re]ax,%[re]ax,1) # 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 - .align 4, 0x90 + .align 4 ret # 12 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -117,7 +117,7 @@ # nopw 0(%[re]ax,%[re]ax,1) # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 - .align 4, 0x90 + .align 4 ret # 13 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -128,7 +128,7 @@ # nopl 0L(%[re]ax) # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 - .align 4, 0x90 + .align 4 ret # 14 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -138,7 +138,7 @@ # nopl 0L(%[re]ax) # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 - .align 4, 0x90 + .align 4 ret # 15 byte nop test .align 4, 0 # start with 16 byte alignment filled with zeros @@ -147,7 +147,7 @@ # nopl 0L(%[re]ax,%[re]ax,1) # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, # 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 - .align 4, 0x90 + .align 4 ret # Only the .text sections gets optimal nops. diff --git a/llvm/test/MC/RISCV/align-fill-byte-zero.s b/llvm/test/MC/RISCV/align-fill-byte-zero.s new file mode 100644 index 0000000000000..cad881ea4e82c --- /dev/null +++ b/llvm/test/MC/RISCV/align-fill-byte-zero.s @@ -0,0 +1,29 @@ +// RUN: llvm-mc -triple riscv32 %s -o - | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -triple riscv32 -filetype obj %s -o - | \ +// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ + +// RUN: llvm-mc -triple riscv64 %s -o - | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -triple riscv64 -filetype obj %s -o - | \ +// RUN: llvm-objdump -dz - | FileCheck %s --check-prefix=OBJ + +// llvm.org/pr30955 - LLVM was handling `.balign , 0` strangely on +// non-x86 targets. + + .text + +// ASM: addi a0, a0, 1 +// OBJ: 00150513 addi a0, a0, 0x1 + addi a0, a0, 0x1 + +// ASM: .p2align 4, 0x0 +// OBJ-NEXT: 0000 +// OBJ-NEXT: 0000 +// OBJ-NEXT: 0000 +// OBJ-NEXT: 0000 +// OBJ-NEXT: 0000 +// OBJ-NEXT: 0000 + .balign 0x10, 0 + +// ASM: addi a0, a0, 1 +// OBJ-NEXT: 00150513 addi a0, a0, 0x1 + addi a0, a0, 0x1 diff --git a/llvm/test/MC/X86/code16gcc-align.s b/llvm/test/MC/X86/code16gcc-align.s index 657364a604275..107986a9ec70e 100644 --- a/llvm/test/MC/X86/code16gcc-align.s +++ b/llvm/test/MC/X86/code16gcc-align.s @@ -21,19 +21,19 @@ .text .code16gcc .globl test - .p2align 4, 0x90 + .p2align 4 .type test,@function test: .nops 34 movl %eax, %edi xorl %ebx, %ebx - .p2align 4, 0x90 + .p2align 4 movzbl (%esi,%ebx), %ecx calll called .nops 3 retl - .p2align 4, 0x90 + .p2align 4 .type called,@function called: .nops 1