diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 2dcce5c224d5d..914e1e727981c 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, case R_X86_64_REX_GOTPCRELX: case R_X86_64_CODE_4_GOTPCRELX: case R_X86_64_GOTTPOFF: + case R_X86_64_CODE_4_GOTTPOFF: return R_GOT_PC; case R_X86_64_GOTOFF64: return R_GOTPLTREL; @@ -547,44 +548,69 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, } } -// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to -// R_X86_64_TPOFF32 so that it does not use GOT. +// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can +// be optimized to R_X86_64_TPOFF32 so that it does not use GOT. void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const { uint8_t *inst = loc - 3; uint8_t reg = loc[-1] >> 3; uint8_t *regSlot = loc - 1; - // Note that ADD with RSP or R12 is converted to ADD instead of LEA - // because LEA with these registers needs 4 bytes to encode and thus - // wouldn't fit the space. - - if (memcmp(inst, "\x48\x03\x25", 3) == 0) { - // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" - memcpy(inst, "\x48\x81\xc4", 3); - } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) { - // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" - memcpy(inst, "\x49\x81\xc4", 3); - } else if (memcmp(inst, "\x4c\x03", 2) == 0) { - // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" - memcpy(inst, "\x4d\x8d", 2); - *regSlot = 0x80 | (reg << 3) | reg; - } else if (memcmp(inst, "\x48\x03", 2) == 0) { - // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" - memcpy(inst, "\x48\x8d", 2); - *regSlot = 0x80 | (reg << 3) | reg; - } else if (memcmp(inst, "\x4c\x8b", 2) == 0) { - // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" - memcpy(inst, "\x49\xc7", 2); - *regSlot = 0xc0 | reg; - } else if (memcmp(inst, "\x48\x8b", 2) == 0) { - // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" - memcpy(inst, "\x48\xc7", 2); + if (rel.type == R_X86_64_GOTTPOFF) { + // Note that ADD with RSP or R12 is converted to ADD instead of LEA + // because LEA with these registers needs 4 bytes to encode and thus + // wouldn't fit the space. + + if (memcmp(inst, "\x48\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" + memcpy(inst, "\x48\x81\xc4", 3); + } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" + memcpy(inst, "\x49\x81\xc4", 3); + } else if (memcmp(inst, "\x4c\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" + memcpy(inst, "\x4d\x8d", 2); + *regSlot = 0x80 | (reg << 3) | reg; + } else if (memcmp(inst, "\x48\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" + memcpy(inst, "\x48\x8d", 2); + *regSlot = 0x80 | (reg << 3) | reg; + } else if (memcmp(inst, "\x4c\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" + memcpy(inst, "\x49\xc7", 2); + *regSlot = 0xc0 | reg; + } else if (memcmp(inst, "\x48\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" + memcpy(inst, "\x48\xc7", 2); + *regSlot = 0xc0 | reg; + } else { + Err(ctx) + << getErrorLoc(ctx, loc - 3) + << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"; + } + } else if (rel.type == R_X86_64_CODE_4_GOTTPOFF) { + if (loc[-4] != 0xd5) { + Err(ctx) << getErrorLoc(ctx, loc - 4) + << "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!"; + return; + } + const uint8_t rex = loc[-3]; + loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2; *regSlot = 0xc0 | reg; + + if (loc[-2] == 0x8b) { + // "movq foo@gottpoff(%rip),%r[16-31]" -> "movq $foo,%r[16-31]" + loc[-2] = 0xc7; + } else if (loc[-2] == 0x03) { + // "addq foo@gottpoff(%rip),%r[16-31]" -> "addq $foo,%r[16-31]" + loc[-2] = 0x81; + } else { + Err(ctx) << getErrorLoc(ctx, loc - 4) + << "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ " + "instructions only"; + } } else { - ErrAlways(ctx) - << getErrorLoc(ctx, loc - 3) - << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"; + llvm_unreachable("Unsupported relocation type!"); } // The original code used a PC relative relocation. @@ -741,6 +767,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_X86_64_CODE_4_GOTPCRELX: case R_X86_64_PC32: case R_X86_64_GOTTPOFF: + case R_X86_64_CODE_4_GOTTPOFF: case R_X86_64_PLT32: case R_X86_64_TLSGD: case R_X86_64_TLSLD: @@ -850,6 +877,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } break; case R_X86_64_GOTTPOFF: + case R_X86_64_CODE_4_GOTTPOFF: if (rel.expr == R_RELAX_TLS_IE_TO_LE) { relaxTlsIeToLe(loc, rel, val); } else { diff --git a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s index b3b1e8a060277..c6464b4bece09 100644 --- a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s +++ b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s @@ -12,12 +12,16 @@ .globl foo foo: movq tlsvar@GOTTPOFF(%rip), %rcx + movq tlsvar2@GOTTPOFF(%rip), %r31 + .section .tdata,"awT",@progbits .space 0x1234 tlsvar: .word 42 - +tlsvar2: + .word 42 // CHECK: Section ({{.+}}) .rela.dyn { // CHECK-NEXT: R_X86_64_TPOFF64 - 0x1234 +// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1236 // CHECK-NEXT: } diff --git a/lld/test/ELF/tls-opt.s b/lld/test/ELF/tls-opt.s index ce90ba4f869ce..818203ee19cb7 100644 --- a/lld/test/ELF/tls-opt.s +++ b/lld/test/ELF/tls-opt.s @@ -20,6 +20,12 @@ // DISASM-NEXT: leaq -4(%r15), %r15 // DISASM-NEXT: addq $-4, %rsp // DISASM-NEXT: addq $-4, %r12 +// DISASM-NEXT: movq $-8, %r16 +// DISASM-NEXT: movq $-8, %r20 +// DISASM-NEXT: movq $-4, %r16 +// DISASM-NEXT: addq $-8, %r16 +// DISASM-NEXT: addq $-8, %r28 +// DISASM-NEXT: addq $-4, %r16 // LD to LE: // DISASM-NEXT: movq %fs:0, %rax @@ -69,6 +75,13 @@ _start: addq tls1@GOTTPOFF(%rip), %r15 addq tls1@GOTTPOFF(%rip), %rsp addq tls1@GOTTPOFF(%rip), %r12 + # EGPR + movq tls0@GOTTPOFF(%rip), %r16 + movq tls0@GOTTPOFF(%rip), %r20 + movq tls1@GOTTPOFF(%rip), %r16 + addq tls0@GOTTPOFF(%rip), %r16 + addq tls0@GOTTPOFF(%rip), %r28 + addq tls1@GOTTPOFF(%rip), %r16 // LD to LE leaq tls0@tlsld(%rip), %rdi diff --git a/lld/test/ELF/x86-64-tls-ie-local.s b/lld/test/ELF/x86-64-tls-ie-local.s index c527c86e66771..340a654ef9c28 100644 --- a/lld/test/ELF/x86-64-tls-ie-local.s +++ b/lld/test/ELF/x86-64-tls-ie-local.s @@ -5,21 +5,29 @@ # RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s -# SEC: .got PROGBITS 0000000000002338 000338 000010 00 WA 0 0 8 +# SEC: .got PROGBITS 0000000000002348 000348 000010 00 WA 0 0 8 ## Dynamic relocations for non-preemptable symbols in a shared object have section index 0. # REL: .rela.dyn { -# REL-NEXT: 0x2338 R_X86_64_TPOFF64 - 0x0 -# REL-NEXT: 0x2340 R_X86_64_TPOFF64 - 0x4 +# REL-NEXT: 0x2348 R_X86_64_TPOFF64 - 0x0 +# REL-NEXT: 0x2350 R_X86_64_TPOFF64 - 0x4 # REL-NEXT: } -## &.got[0] - 0x127f = 0x2338 - 0x127f = 4281 -## &.got[1] - 0x1286 = 0x2340 - 0x1286 = 4282 -# CHECK: 1278: addq 4281(%rip), %rax -# CHECK-NEXT: 127f: addq 4282(%rip), %rax +## &.got[0] - 0x127f = 0x2348 - 0x127f = 4297 +## &.got[1] - 0x1286 = 0x2350 - 0x1286 = 4298 +## &.got[2] - 0x128e = 0x2348 - 0x128e = 4282 +## &.got[3] - 0x1296 = 0x2350 - 0x1296 = 4282 + +# CHECK: 1278: addq 4297(%rip), %rax +# CHECK-NEXT: 127f: addq 4298(%rip), %rax +# CHECK-NEXT: 1286: addq 4282(%rip), %r16 +# CHECK-NEXT: 128e: addq 4282(%rip), %r16 addq foo@GOTTPOFF(%rip), %rax addq bar@GOTTPOFF(%rip), %rax +addq foo@GOTTPOFF(%rip), %r16 +addq bar@GOTTPOFF(%rip), %r16 + .section .tbss,"awT",@nobits foo: