@@ -231,36 +231,71 @@ static void writePltHeaderLong(uint8_t *buf) {
231231// The default PLT header requires the .got.plt to be within 128 Mb of the
232232// .plt in the positive direction.
233233void ARM::writePltHeader (uint8_t *buf) const {
234- // Use a similar sequence to that in writePlt(), the difference is the calling
235- // conventions mean we use lr instead of ip. The PLT entry is responsible for
236- // saving lr on the stack, the dynamic loader is responsible for reloading
237- // it.
238- const uint32_t pltData[] = {
239- 0xe52de004 , // L1: str lr, [sp,#-4]!
240- 0xe28fe600 , // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
241- 0xe28eea00 , // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
242- 0xe5bef000 , // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
243- };
244-
245- uint64_t offset = in.gotPlt ->getVA () - in.plt ->getVA () - 4 ;
246- if (!llvm::isUInt<27 >(offset)) {
247- // We cannot encode the Offset, use the long form.
248- writePltHeaderLong (buf);
249- return ;
234+ if (config->armThumbPLTs ) {
235+ // The instruction sequence for thumb:
236+ //
237+ // 0: b500 push {lr}
238+ // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe>
239+ // 6: 44fe add lr, pc
240+ // 8: f85e ff08 ldr pc, [lr, #8]!
241+ // e: .word .got.plt - .plt - 16
242+ //
243+ // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
244+ // `pc` in the add instruction and 8 bytes for the `lr` adjustment.
245+ //
246+ uint64_t offset = in.gotPlt ->getVA () - in.plt ->getVA () - 16 ;
247+ assert (llvm::isUInt<32 >(offset) && " This should always fit into a 32-bit offset" );
248+ write16 (buf + 0 , 0xb500 );
249+ // Split into two halves to support endianness correctly.
250+ write16 (buf + 2 , 0xf8df );
251+ write16 (buf + 4 , 0xe008 );
252+ write16 (buf + 6 , 0x44fe );
253+ // Split into two halves to support endianness correctly.
254+ write16 (buf + 8 , 0xf85e );
255+ write16 (buf + 10 , 0xff08 );
256+ write32 (buf + 12 , offset);
257+
258+ memcpy (buf + 16 , trapInstr.data (), 4 ); // Pad to 32-byte boundary
259+ memcpy (buf + 20 , trapInstr.data (), 4 );
260+ memcpy (buf + 24 , trapInstr.data (), 4 );
261+ memcpy (buf + 28 , trapInstr.data (), 4 );
262+ } else {
263+ // Use a similar sequence to that in writePlt(), the difference is the
264+ // calling conventions mean we use lr instead of ip. The PLT entry is
265+ // responsible for saving lr on the stack, the dynamic loader is responsible
266+ // for reloading it.
267+ const uint32_t pltData[] = {
268+ 0xe52de004 , // L1: str lr, [sp,#-4]!
269+ 0xe28fe600 , // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
270+ 0xe28eea00 , // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
271+ 0xe5bef000 , // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
272+ };
273+
274+ uint64_t offset = in.gotPlt ->getVA () - in.plt ->getVA () - 4 ;
275+ if (!llvm::isUInt<27 >(offset)) {
276+ // We cannot encode the Offset, use the long form.
277+ writePltHeaderLong (buf);
278+ return ;
279+ }
280+ write32 (buf + 0 , pltData[0 ]);
281+ write32 (buf + 4 , pltData[1 ] | ((offset >> 20 ) & 0xff ));
282+ write32 (buf + 8 , pltData[2 ] | ((offset >> 12 ) & 0xff ));
283+ write32 (buf + 12 , pltData[3 ] | (offset & 0xfff ));
284+ memcpy (buf + 16 , trapInstr.data (), 4 ); // Pad to 32-byte boundary
285+ memcpy (buf + 20 , trapInstr.data (), 4 );
286+ memcpy (buf + 24 , trapInstr.data (), 4 );
287+ memcpy (buf + 28 , trapInstr.data (), 4 );
250288 }
251- write32 (buf + 0 , pltData[0 ]);
252- write32 (buf + 4 , pltData[1 ] | ((offset >> 20 ) & 0xff ));
253- write32 (buf + 8 , pltData[2 ] | ((offset >> 12 ) & 0xff ));
254- write32 (buf + 12 , pltData[3 ] | (offset & 0xfff ));
255- memcpy (buf + 16 , trapInstr.data (), 4 ); // Pad to 32-byte boundary
256- memcpy (buf + 20 , trapInstr.data (), 4 );
257- memcpy (buf + 24 , trapInstr.data (), 4 );
258- memcpy (buf + 28 , trapInstr.data (), 4 );
259289}
260290
261291void ARM::addPltHeaderSymbols (InputSection &isec) const {
262- addSyntheticLocal (" $a" , STT_NOTYPE, 0 , 0 , isec);
263- addSyntheticLocal (" $d" , STT_NOTYPE, 16 , 0 , isec);
292+ if (config->armThumbPLTs ) {
293+ addSyntheticLocal (" $t" , STT_NOTYPE, 0 , 0 , isec);
294+ addSyntheticLocal (" $d" , STT_NOTYPE, 12 , 0 , isec);
295+ } else {
296+ addSyntheticLocal (" $a" , STT_NOTYPE, 0 , 0 , isec);
297+ addSyntheticLocal (" $d" , STT_NOTYPE, 16 , 0 , isec);
298+ }
264299}
265300
266301// Long form PLT entries that do not have any restrictions on the displacement
@@ -279,32 +314,65 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
279314// .plt in the positive direction.
280315void ARM::writePlt (uint8_t *buf, const Symbol &sym,
281316 uint64_t pltEntryAddr) const {
282- // The PLT entry is similar to the example given in Appendix A of ELF for
283- // the Arm Architecture. Instead of using the Group Relocations to find the
284- // optimal rotation for the 8-bit immediate used in the add instructions we
285- // hard code the most compact rotations for simplicity. This saves a load
286- // instruction over the long plt sequences.
287- const uint32_t pltData[] = {
288- 0xe28fc600 , // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
289- 0xe28cca00 , // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
290- 0xe5bcf000 , // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
291- };
292317
293- uint64_t offset = sym.getGotPltVA () - pltEntryAddr - 8 ;
294- if (!llvm::isUInt<27 >(offset)) {
295- // We cannot encode the Offset, use the long form.
296- writePltLong (buf, sym.getGotPltVA (), pltEntryAddr);
297- return ;
318+ if (!config->armThumbPLTs ) {
319+ uint64_t offset = sym.getGotPltVA () - pltEntryAddr - 8 ;
320+
321+ // The PLT entry is similar to the example given in Appendix A of ELF for
322+ // the Arm Architecture. Instead of using the Group Relocations to find the
323+ // optimal rotation for the 8-bit immediate used in the add instructions we
324+ // hard code the most compact rotations for simplicity. This saves a load
325+ // instruction over the long plt sequences.
326+ const uint32_t pltData[] = {
327+ 0xe28fc600 , // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
328+ 0xe28cca00 , // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
329+ 0xe5bcf000 , // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
330+ };
331+ if (!llvm::isUInt<27 >(offset)) {
332+ // We cannot encode the Offset, use the long form.
333+ writePltLong (buf, sym.getGotPltVA (), pltEntryAddr);
334+ return ;
335+ }
336+ write32 (buf + 0 , pltData[0 ] | ((offset >> 20 ) & 0xff ));
337+ write32 (buf + 4 , pltData[1 ] | ((offset >> 12 ) & 0xff ));
338+ write32 (buf + 8 , pltData[2 ] | (offset & 0xfff ));
339+ memcpy (buf + 12 , trapInstr.data (), 4 ); // Pad to 16-byte boundary
340+ } else {
341+ uint64_t offset = sym.getGotPltVA () - pltEntryAddr - 12 ;
342+ assert (llvm::isUInt<32 >(offset) && " This should always fit into a 32-bit offset" );
343+
344+ // A PLT entry will be:
345+ //
346+ // movw ip, #<lower 16 bits>
347+ // movt ip, #<upper 16 bits>
348+ // add ip, pc
349+ // L1: ldr.w pc, [ip]
350+ // b L1
351+ //
352+ // where ip = r12 = 0xc
353+
354+ // movw ip, #<lower 16 bits>
355+ write16 (buf + 2 , 0x0c00 ); // use `ip`
356+ relocateNoSym (buf, R_ARM_THM_MOVW_ABS_NC, offset);
357+
358+ // movt ip, #<upper 16 bits>
359+ write16 (buf + 6 , 0x0c00 ); // use `ip`
360+ relocateNoSym (buf + 4 , R_ARM_THM_MOVT_ABS, offset);
361+
362+ write16 (buf + 8 , 0x44fc ); // add ip, pc
363+ write16 (buf + 10 , 0xf8dc ); // ldr.w pc, [ip] (bottom half)
364+ write16 (buf + 12 , 0xf000 ); // ldr.w pc, [ip] (upper half)
365+ write16 (buf + 14 , 0xe7fc ); // Branch to previous instruction
298366 }
299- write32 (buf + 0 , pltData[0 ] | ((offset >> 20 ) & 0xff ));
300- write32 (buf + 4 , pltData[1 ] | ((offset >> 12 ) & 0xff ));
301- write32 (buf + 8 , pltData[2 ] | (offset & 0xfff ));
302- memcpy (buf + 12 , trapInstr.data (), 4 ); // Pad to 16-byte boundary
303367}
304368
305369void ARM::addPltSymbols (InputSection &isec, uint64_t off) const {
306- addSyntheticLocal (" $a" , STT_NOTYPE, off, 0 , isec);
307- addSyntheticLocal (" $d" , STT_NOTYPE, off + 12 , 0 , isec);
370+ if (config->armThumbPLTs ) {
371+ addSyntheticLocal (" $t" , STT_NOTYPE, off, 0 , isec);
372+ } else {
373+ addSyntheticLocal (" $a" , STT_NOTYPE, off, 0 , isec);
374+ addSyntheticLocal (" $d" , STT_NOTYPE, off + 12 , 0 , isec);
375+ }
308376}
309377
310378bool ARM::needsThunk (RelExpr expr, RelType type, const InputFile *file,
@@ -325,6 +393,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
325393 case R_ARM_JUMP24:
326394 // Source is ARM, all PLT entries are ARM so no interworking required.
327395 // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
396+ assert (!config->armThumbPLTs &&
397+ " If the source is ARM, we should not need Thumb PLTs" );
328398 if (s.isFunc () && expr == R_PC && (s.getVA () & 1 ))
329399 return true ;
330400 [[fallthrough]];
@@ -335,9 +405,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
335405 }
336406 case R_ARM_THM_JUMP19:
337407 case R_ARM_THM_JUMP24:
338- // Source is Thumb, all PLT entries are ARM so interworking is required.
408+ // Source is Thumb, when all PLT entries are ARM interworking is required.
339409 // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
340- if (expr == R_PLT_PC || (s.isFunc () && (s.getVA () & 1 ) == 0 ))
410+ if (( expr == R_PLT_PC && !config-> armThumbPLTs ) || (s.isFunc () && (s.getVA () & 1 ) == 0 ))
341411 return true ;
342412 [[fallthrough]];
343413 case R_ARM_THM_CALL: {
@@ -547,7 +617,6 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
547617 // STT_FUNC we choose whether to write a BL or BLX depending on the
548618 // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
549619 // not of type STT_FUNC then we must preserve the original instruction.
550- // PLT entries are always ARM state so we know we don't need to interwork.
551620 assert (rel.sym ); // R_ARM_CALL is always reached via relocate().
552621 bool bit0Thumb = val & 1 ;
553622 bool isBlx = (read32 (loc) & 0xfe000000 ) == 0xfa000000 ;
@@ -606,12 +675,13 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
606675 // PLT entries are always ARM state so we know we need to interwork.
607676 assert (rel.sym ); // R_ARM_THM_CALL is always reached via relocate().
608677 bool bit0Thumb = val & 1 ;
678+ bool useThumb = bit0Thumb || config->armThumbPLTs ;
609679 bool isBlx = (read16 (loc + 2 ) & 0x1000 ) == 0 ;
610680 // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
611- // even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
612- if (!rel.sym ->isFunc () && !rel.sym ->isInPlt () && isBlx == bit0Thumb )
681+ // even when type not STT_FUNC.
682+ if (!rel.sym ->isFunc () && !rel.sym ->isInPlt () && isBlx == useThumb )
613683 stateChangeWarning (loc, rel.type , *rel.sym );
614- if (rel.sym ->isFunc () || rel.sym ->isInPlt () ? !bit0Thumb : isBlx) {
684+ if (( rel.sym ->isFunc () || rel.sym ->isInPlt ()) ? !useThumb : isBlx) {
615685 // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
616686 // the BLX instruction may only be two byte aligned. This must be done
617687 // before overflow check.
0 commit comments