@@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
396396 case R_X86_64_REX_GOTPCRELX:
397397 case R_X86_64_CODE_4_GOTPCRELX:
398398 case R_X86_64_GOTTPOFF:
399+ case R_X86_64_CODE_4_GOTTPOFF:
399400 return R_GOT_PC;
400401 case R_X86_64_GOTOFF64:
401402 return R_GOTPLTREL;
@@ -547,44 +548,58 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
547548 }
548549}
549550
550- // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
551- // R_X86_64_TPOFF32 so that it does not use GOT.
551+ // In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can
552+ // be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
552553void X86_64::relaxTlsIeToLe (uint8_t *loc, const Relocation &rel,
553554 uint64_t val) const {
554555 uint8_t *inst = loc - 3 ;
555556 uint8_t reg = loc[-1 ] >> 3 ;
556557 uint8_t *regSlot = loc - 1 ;
557558
558- // Note that ADD with RSP or R12 is converted to ADD instead of LEA
559- // because LEA with these registers needs 4 bytes to encode and thus
560- // wouldn't fit the space.
561-
562- if (memcmp (inst, " \x48\x03\x25 " , 3 ) == 0 ) {
563- // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
564- memcpy (inst, " \x48\x81\xc4 " , 3 );
565- } else if (memcmp (inst, " \x4c\x03\x25 " , 3 ) == 0 ) {
566- // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
567- memcpy (inst, " \x49\x81\xc4 " , 3 );
568- } else if (memcmp (inst, " \x4c\x03 " , 2 ) == 0 ) {
569- // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
570- memcpy (inst, " \x4d\x8d " , 2 );
571- *regSlot = 0x80 | (reg << 3 ) | reg;
572- } else if (memcmp (inst, " \x48\x03 " , 2 ) == 0 ) {
573- // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
574- memcpy (inst, " \x48\x8d " , 2 );
575- *regSlot = 0x80 | (reg << 3 ) | reg;
576- } else if (memcmp (inst, " \x4c\x8b " , 2 ) == 0 ) {
577- // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
578- memcpy (inst, " \x49\xc7 " , 2 );
579- *regSlot = 0xc0 | reg;
580- } else if (memcmp (inst, " \x48\x8b " , 2 ) == 0 ) {
581- // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
582- memcpy (inst, " \x48\xc7 " , 2 );
583- *regSlot = 0xc0 | reg;
559+ if (rel.type == R_X86_64_GOTTPOFF) {
560+ // Note that ADD with RSP or R12 is converted to ADD instead of LEA
561+ // because LEA with these registers needs 4 bytes to encode and thus
562+ // wouldn't fit the space.
563+
564+ if (memcmp (inst, " \x48\x03\x25 " , 3 ) == 0 ) {
565+ // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
566+ memcpy (inst, " \x48\x81\xc4 " , 3 );
567+ } else if (memcmp (inst, " \x4c\x03\x25 " , 3 ) == 0 ) {
568+ // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
569+ memcpy (inst, " \x49\x81\xc4 " , 3 );
570+ } else if (memcmp (inst, " \x4c\x03 " , 2 ) == 0 ) {
571+ // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
572+ memcpy (inst, " \x4d\x8d " , 2 );
573+ *regSlot = 0x80 | (reg << 3 ) | reg;
574+ } else if (memcmp (inst, " \x48\x03 " , 2 ) == 0 ) {
575+ // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
576+ memcpy (inst, " \x48\x8d " , 2 );
577+ *regSlot = 0x80 | (reg << 3 ) | reg;
578+ } else if (memcmp (inst, " \x4c\x8b " , 2 ) == 0 ) {
579+ // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
580+ memcpy (inst, " \x49\xc7 " , 2 );
581+ *regSlot = 0xc0 | reg;
582+ } else if (memcmp (inst, " \x48\x8b " , 2 ) == 0 ) {
583+ // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
584+ memcpy (inst, " \x48\xc7 " , 2 );
585+ *regSlot = 0xc0 | reg;
586+ } else {
587+ ErrAlways (ctx)
588+ << getErrorLoc (ctx, loc - 3 )
589+ << " R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only" ;
590+ }
584591 } else {
585- ErrAlways (ctx)
586- << getErrorLoc (ctx, loc - 3 )
587- << " R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only" ;
592+ assert (rel.type == R_X86_64_CODE_4_GOTTPOFF &&
593+ " Unsupported relocation type!" );
594+ assert ((loc[-4 ] == 0xd5 ) &&
595+ " Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!" );
596+ const uint8_t rex = loc[-3 ];
597+ loc[-3 ] = (rex & ~0x44 ) | (rex & 0x44 ) >> 2 ;
598+ *regSlot = 0xc0 | reg;
599+ if (loc[-2 ] == 0x8b )
600+ loc[-2 ] = 0xc7 ;
601+ else
602+ loc[-2 ] = 0x81 ;
588603 }
589604
590605 // The original code used a PC relative relocation.
@@ -741,6 +756,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
741756 case R_X86_64_CODE_4_GOTPCRELX:
742757 case R_X86_64_PC32:
743758 case R_X86_64_GOTTPOFF:
759+ case R_X86_64_CODE_4_GOTTPOFF:
744760 case R_X86_64_PLT32:
745761 case R_X86_64_TLSGD:
746762 case R_X86_64_TLSLD:
@@ -850,6 +866,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
850866 }
851867 break ;
852868 case R_X86_64_GOTTPOFF:
869+ case R_X86_64_CODE_4_GOTTPOFF:
853870 if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
854871 relaxTlsIeToLe (loc, rel, val);
855872 } else {
0 commit comments