@@ -575,28 +575,89 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
575
575
int64_t DispValue;
576
576
const MCExpr *DispExpr;
577
577
578
+ // In AArch, identify the instruction adding the PC-relative offset to
579
+ // jump table entries to correctly decode it.
580
+ MCInst *PCRelBaseInstr;
581
+ uint64_t PCRelAddr = 0 ;
582
+
583
+ MutableArrayRef<MCInst> BB = Instructions;
584
+
585
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64) {
586
+ PreserveNops = opts::Relocs;
587
+ // Start at the last label as an approximation of the current basic block.
588
+ // This is a heuristic, since the full set of labels have yet to be
589
+ // determined
590
+ for (auto LI = Labels.rbegin (); LI != Labels.rend (); ++LI) {
591
+ auto II = InstructionOffsets.find (LI->first );
592
+ if (II != InstructionOffsets.end ()) {
593
+ BB = BB.slice (II->second );
594
+ break ;
595
+ }
596
+ }
597
+ }
598
+
578
599
auto Type = BC.MIA ->analyzeIndirectBranch (Instruction,
579
- Instructions ,
600
+ BB ,
580
601
PtrSize,
581
602
MemLocInstr,
582
603
BaseRegNum,
583
604
IndexRegNum,
584
605
DispValue,
585
- DispExpr);
606
+ DispExpr,
607
+ PCRelBaseInstr);
586
608
587
609
if (Type == IndirectBranchType::UNKNOWN && !MemLocInstr)
588
610
return Type;
589
611
590
612
if (MemLocInstr != &Instruction)
591
613
IndexRegNum = 0 ;
592
614
615
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64) {
616
+ const auto *Sym = BC.MIA ->getTargetSymbol (*PCRelBaseInstr, 1 );
617
+ assert (Sym && " Symbol extraction failed" );
618
+ auto SI = BC.GlobalSymbols .find (Sym->getName ());
619
+ if (SI != BC.GlobalSymbols .end ()) {
620
+ PCRelAddr = SI->second ;
621
+ } else {
622
+ for (auto &Elmt : Labels) {
623
+ if (Elmt.second == Sym) {
624
+ PCRelAddr = Elmt.first + getAddress ();
625
+ break ;
626
+ }
627
+ }
628
+ }
629
+ uint64_t InstrAddr = 0 ;
630
+ for (auto II = InstructionOffsets.rbegin (); II != InstructionOffsets.rend ();
631
+ ++II) {
632
+ if (&Instructions[II->second ] == PCRelBaseInstr) {
633
+ InstrAddr = II->first + getAddress ();
634
+ break ;
635
+ }
636
+ }
637
+ assert (InstrAddr != 0 && " instruction not found" );
638
+ // We do this to avoid spurious references to code locations outside this
639
+ // function (for example, if the indirect jump lives in the last basic
640
+ // block of the function, it will create a reference to the next function).
641
+ // This replaces a symbol reference with an immediate.
642
+ BC.MIA ->replaceMemOperandDisp (*PCRelBaseInstr,
643
+ MCOperand::createImm (PCRelAddr - InstrAddr));
644
+ // FIXME: Disable full jump table processing for AArch64 until we have a
645
+ // proper way of determining the jump table limits.
646
+ return IndirectBranchType::UNKNOWN;
647
+ }
648
+
593
649
// RIP-relative addressing should be converted to symbol form by now
594
650
// in processed instructions (but not in jump).
595
651
if (DispExpr) {
596
- auto SI = BC.GlobalSymbols .find (DispExpr->getSymbol ().getName ());
652
+ auto SI =
653
+ BC.GlobalSymbols .find (BC.MIA ->getTargetSymbol (DispExpr)->getName ());
597
654
assert (SI != BC.GlobalSymbols .end () && " global symbol needs a value" );
598
655
ArrayStart = SI->second ;
599
656
BaseRegNum = 0 ;
657
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64) {
658
+ ArrayStart &= ~0xFFFULL ;
659
+ ArrayStart += DispValue & 0xFFFULL ;
660
+ }
600
661
} else {
601
662
ArrayStart = static_cast <uint64_t >(DispValue);
602
663
}
@@ -679,7 +740,9 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
679
740
<< " is referencing address 0x"
680
741
<< Twine::utohexstr (Section.getAddress () + ValueOffset));
681
742
// Extract the value and increment the offset.
682
- if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
743
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64) {
744
+ Value = PCRelAddr + DE.getSigned (&ValueOffset, EntrySize);
745
+ } else if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
683
746
Value = ArrayStart + DE.getSigned (&ValueOffset, 4 );
684
747
} else {
685
748
Value = DE.getAddress (&ValueOffset);
@@ -810,7 +873,8 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
810
873
if (!TargetSymbol && Section && Section->isText () &&
811
874
(BC.TheTriple ->getArch () != llvm::Triple::aarch64 ||
812
875
!BC.MIA ->isADRP (Instruction))) {
813
- if (containsAddress (TargetAddress)) {
876
+ if (containsAddress (TargetAddress, /* UseMaxSize=*/
877
+ BC.TheTriple ->getArch () == llvm::Triple::aarch64)) {
814
878
if (TargetAddress != getAddress ()) {
815
879
// The address could potentially escape. Mark it as another entry
816
880
// point into the function.
@@ -831,7 +895,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
831
895
Instruction,
832
896
MCSymbolRefExpr::create (
833
897
TargetSymbol, MCSymbolRefExpr::VK_None, *BC.Ctx ),
834
- *BC.Ctx )));
898
+ *BC.Ctx , 0 )));
835
899
return true ;
836
900
};
837
901
@@ -890,6 +954,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
890
954
}
891
955
892
956
// Check if there's a relocation associated with this instruction.
957
+ bool UsedReloc{false };
893
958
if (!Relocations.empty ()) {
894
959
auto RI = Relocations.lower_bound (Offset);
895
960
if (RI != Relocations.end () && RI->first < Offset + Size) {
@@ -900,15 +965,21 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
900
965
<< " for instruction at offset 0x"
901
966
<< Twine::utohexstr (Offset) << ' \n ' );
902
967
int64_t Value;
903
- const auto Result =
904
- BC. MIA -> replaceImmWithSymbol ( Instruction, Relocation.Symbol ,
905
- Relocation.Addend , Ctx. get (), Value );
968
+ const auto Result = BC. MIA -> replaceImmWithSymbol (
969
+ Instruction, Relocation.Symbol , Relocation. Addend , Ctx. get (), Value ,
970
+ Relocation.Type );
906
971
(void )Result;
907
972
assert (Result && " cannot replace immediate with relocation" );
973
+ // For aarch, if we replaced an immediate with a symbol from a
974
+ // relocation, we mark it so we do not try to further process a
975
+ // pc-relative operand. All we need is the symbol.
976
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64)
977
+ UsedReloc = true ;
908
978
909
979
// Make sure we replaced the correct immediate (instruction
910
980
// can have multiple immediate operands).
911
- assert (static_cast <uint64_t >(Value) == Relocation.Value &&
981
+ assert ((BC.TheTriple ->getArch () == llvm::Triple::aarch64 ||
982
+ static_cast <uint64_t >(Value) == Relocation.Value ) &&
912
983
" immediate value mismatch in function" );
913
984
}
914
985
}
@@ -1081,7 +1152,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
1081
1152
// Indirect call. We only need to fix it if the operand is RIP-relative
1082
1153
if (IsSimple && MIA->hasPCRelOperand (Instruction)) {
1083
1154
if (!handlePCRelOperand (Instruction, AbsoluteInstrAddr, Size)) {
1084
- errs () << " BOLT-ERROR: cannot handle RIP operand at 0x"
1155
+ errs () << " BOLT-ERROR: cannot handle PC-relative operand at 0x"
1085
1156
<< Twine::utohexstr (AbsoluteInstrAddr)
1086
1157
<< " . Skipping function " << *this << " .\n " ;
1087
1158
if (opts::Relocs)
@@ -1091,9 +1162,9 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
1091
1162
}
1092
1163
}
1093
1164
} else {
1094
- if (MIA->hasPCRelOperand (Instruction)) {
1165
+ if (MIA->hasPCRelOperand (Instruction) && !UsedReloc ) {
1095
1166
if (!handlePCRelOperand (Instruction, AbsoluteInstrAddr, Size)) {
1096
- errs () << " BOLT-ERROR: cannot handle RIP operand at 0x"
1167
+ errs () << " BOLT-ERROR: cannot handle PC-relative operand at 0x"
1097
1168
<< Twine::utohexstr (AbsoluteInstrAddr)
1098
1169
<< " . Skipping function " << *this << " .\n " ;
1099
1170
if (opts::Relocs)
@@ -1359,7 +1430,7 @@ bool BinaryFunction::buildCFG() {
1359
1430
// Ignore nops. We use nops to derive alignment of the next basic block.
1360
1431
// It will not always work, as some blocks are naturally aligned, but
1361
1432
// it's just part of heuristic for block alignment.
1362
- if (MIA->isNoop (Instr)) {
1433
+ if (MIA->isNoop (Instr) && !PreserveNops ) {
1363
1434
IsLastInstrNop = true ;
1364
1435
continue ;
1365
1436
}
@@ -2593,9 +2664,8 @@ void BinaryFunction::emitConstantIslands(MCStreamer &Streamer) {
2593
2664
outs () << " BOLT-INFO: emitting constant island for function " << *this
2594
2665
<< " \n " ;
2595
2666
2596
- auto IS = IslandSymbols.begin ();
2597
-
2598
2667
// We split the island into smaller blocks and output labels between them.
2668
+ auto IS = IslandSymbols.begin ();
2599
2669
for (auto DataIter = DataOffsets.begin (); DataIter != DataOffsets.end ();
2600
2670
++DataIter) {
2601
2671
uint64_t FunctionOffset = *DataIter;
@@ -2617,18 +2687,33 @@ void BinaryFunction::emitConstantIslands(MCStreamer &Streamer) {
2617
2687
if (FunctionOffset == EndOffset)
2618
2688
continue ; // Size is zero, nothing to emit
2619
2689
2620
- // Emit labels and data
2621
- while (IS != IslandSymbols.end () && IS->first < EndOffset) {
2622
- auto NextStop = IS->first ;
2690
+ // Emit labels, relocs and data
2691
+ auto RI = MoveRelocations.lower_bound (FunctionOffset);
2692
+ while ((IS != IslandSymbols.end () && IS->first < EndOffset) ||
2693
+ (RI != MoveRelocations.end () && RI->first < EndOffset)) {
2694
+ auto NextLabelOffset = IS == IslandSymbols.end () ? EndOffset : IS->first ;
2695
+ auto NextRelOffset = RI == MoveRelocations.end () ? EndOffset : RI->first ;
2696
+ auto NextStop = std::min (NextLabelOffset, NextRelOffset);
2623
2697
assert (NextStop <= EndOffset && " internal overflow error" );
2624
2698
if (FunctionOffset < NextStop) {
2625
2699
Streamer.EmitBytes (FunctionContents.slice (FunctionOffset, NextStop));
2626
2700
FunctionOffset = NextStop;
2627
2701
}
2628
- DEBUG (dbgs () << " BOLT-DEBUG: emitted label " << IS->second ->getName ()
2629
- << " at offset 0x" << Twine::utohexstr (IS->first ) << ' \n ' );
2630
- Streamer.EmitLabel (IS->second );
2631
- ++IS;
2702
+ if (IS != IslandSymbols.end () && FunctionOffset == IS->first ) {
2703
+ DEBUG (dbgs () << " BOLT-DEBUG: emitted label " << IS->second ->getName ()
2704
+ << " at offset 0x" << Twine::utohexstr (IS->first ) << ' \n ' );
2705
+ Streamer.EmitLabel (IS->second );
2706
+ ++IS;
2707
+ }
2708
+ if (RI != MoveRelocations.end () && FunctionOffset == RI->first ) {
2709
+ auto RelocationSize = RI->second .emit (&Streamer);
2710
+ DEBUG (dbgs () << " BOLT-DEBUG: emitted relocation for symbol "
2711
+ << RI->second .Symbol ->getName () << " at offset 0x"
2712
+ << Twine::utohexstr (RI->first )
2713
+ << " with size " << RelocationSize << ' \n ' );
2714
+ FunctionOffset += RelocationSize;
2715
+ ++RI;
2716
+ }
2632
2717
}
2633
2718
assert (FunctionOffset <= EndOffset && " overflow error" );
2634
2719
if (FunctionOffset < EndOffset) {
0 commit comments