@@ -795,10 +795,21 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
795
795
}
796
796
}
797
797
798
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64 &&
799
+ isInConstantIsland (TargetAddress)) {
800
+ TargetSymbol = BC.getOrCreateGlobalSymbol (TargetAddress, " ISLANDat" );
801
+ IslandSymbols[TargetAddress - getAddress ()] = TargetSymbol;
802
+ }
803
+
798
804
// Note that the address does not necessarily have to reside inside
799
805
// a section, it could be an absolute address too.
800
806
auto Section = BC.getSectionForAddress (TargetAddress);
801
- if (Section && Section->isText ()) {
807
+ // Assume AArch64's ADRP never references code - it does, but this is fixed
808
+ // after reading relocations. ADRP contents now are not really meaningful
809
+ // without its supporting relocation.
810
+ if (!TargetSymbol && Section && Section->isText () &&
811
+ (BC.TheTriple ->getArch () != llvm::Triple::aarch64 ||
812
+ !BC.MIA ->isADRP (Instruction))) {
802
813
if (containsAddress (TargetAddress)) {
803
814
if (TargetAddress != getAddress ()) {
804
815
// The address could potentially escape. Mark it as another entry
@@ -829,6 +840,16 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
829
840
MCInst Instruction;
830
841
const uint64_t AbsoluteInstrAddr = getAddress () + Offset;
831
842
843
+ // Check for data inside code and ignore it
844
+ if (DataOffsets.find (Offset) != DataOffsets.end ()) {
845
+ auto Iter = CodeOffsets.upper_bound (Offset);
846
+ if (Iter != CodeOffsets.end ()) {
847
+ Size = *Iter - Offset;
848
+ continue ;
849
+ }
850
+ break ;
851
+ }
852
+
832
853
if (!BC.DisAsm ->getInstruction (Instruction,
833
854
Size,
834
855
FunctionData.slice (Offset),
@@ -985,10 +1006,16 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
985
1006
// code without re-assembly.
986
1007
size_t RelSize = (Size < 5 ) ? 1 : 4 ;
987
1008
auto RelOffset = Offset + Size - RelSize;
1009
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64) {
1010
+ RelSize = 0 ;
1011
+ RelOffset = Offset;
1012
+ }
988
1013
auto RI = MoveRelocations.find (RelOffset);
989
1014
if (RI == MoveRelocations.end ()) {
990
1015
uint64_t RelType = (RelSize == 1 ) ? ELF::R_X86_64_PC8
991
1016
: ELF::R_X86_64_PC32;
1017
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64)
1018
+ RelType = ELF::R_AARCH64_CALL26;
992
1019
DEBUG (dbgs () << " BOLT-DEBUG: creating relocation for static"
993
1020
<< " function call to " << TargetSymbol->getName ()
994
1021
<< " at offset 0x"
@@ -2485,6 +2512,9 @@ void BinaryFunction::emitBody(MCStreamer &Streamer, bool EmitColdPart) {
2485
2512
LastIsPrefix = BC.MIA ->isPrefix (Instr);
2486
2513
}
2487
2514
}
2515
+
2516
+ if (!EmitColdPart)
2517
+ emitConstantIslands (Streamer);
2488
2518
}
2489
2519
2490
2520
void BinaryFunction::emitBodyRaw (MCStreamer *Streamer) {
@@ -2545,6 +2575,70 @@ void BinaryFunction::emitBodyRaw(MCStreamer *Streamer) {
2545
2575
}
2546
2576
}
2547
2577
2578
+ void BinaryFunction::emitConstantIslands (MCStreamer &Streamer) {
2579
+ if (DataOffsets.empty ())
2580
+ return ;
2581
+
2582
+ Streamer.EmitLabel (getFunctionConstantIslandLabel ());
2583
+ // Raw contents of the function.
2584
+ StringRef SectionContents;
2585
+ Section.getContents (SectionContents);
2586
+
2587
+ // Raw contents of the function.
2588
+ StringRef FunctionContents =
2589
+ SectionContents.substr (getAddress () - Section.getAddress (),
2590
+ getMaxSize ());
2591
+
2592
+ if (opts::Verbosity)
2593
+ outs () << " BOLT-INFO: emitting constant island for function " << *this
2594
+ << " \n " ;
2595
+
2596
+ auto IS = IslandSymbols.begin ();
2597
+
2598
+ // We split the island into smaller blocks and output labels between them.
2599
+ for (auto DataIter = DataOffsets.begin (); DataIter != DataOffsets.end ();
2600
+ ++DataIter) {
2601
+ uint64_t FunctionOffset = *DataIter;
2602
+ uint64_t EndOffset = 0ULL ;
2603
+
2604
+ // Determine size of this data chunk
2605
+ auto NextData = std::next (DataIter);
2606
+ auto CodeIter = CodeOffsets.lower_bound (*DataIter);
2607
+ if (CodeIter == CodeOffsets.end () && NextData == DataOffsets.end ()) {
2608
+ EndOffset = getMaxSize ();
2609
+ } else if (CodeIter == CodeOffsets.end ()) {
2610
+ EndOffset = *NextData;
2611
+ } else if (NextData == DataOffsets.end ()) {
2612
+ EndOffset = *CodeIter;
2613
+ } else {
2614
+ EndOffset = (*CodeIter > *NextData) ? *NextData : *CodeIter;
2615
+ }
2616
+
2617
+ if (FunctionOffset == EndOffset)
2618
+ continue ; // Size is zero, nothing to emit
2619
+
2620
+ // Emit labels and data
2621
+ while (IS != IslandSymbols.end () && IS->first < EndOffset) {
2622
+ auto NextStop = IS->first ;
2623
+ assert (NextStop <= EndOffset && " internal overflow error" );
2624
+ if (FunctionOffset < NextStop) {
2625
+ Streamer.EmitBytes (FunctionContents.slice (FunctionOffset, NextStop));
2626
+ FunctionOffset = NextStop;
2627
+ }
2628
+ DEBUG (dbgs () << " BOLT-DEBUG: emitted label " << IS->second ->getName ()
2629
+ << " at offset 0x" << Twine::utohexstr (IS->first ) << ' \n ' );
2630
+ Streamer.EmitLabel (IS->second );
2631
+ ++IS;
2632
+ }
2633
+ assert (FunctionOffset <= EndOffset && " overflow error" );
2634
+ if (FunctionOffset < EndOffset) {
2635
+ Streamer.EmitBytes (FunctionContents.slice (FunctionOffset, EndOffset));
2636
+ }
2637
+ }
2638
+
2639
+ assert (IS == IslandSymbols.end () && " some symbols were not emitted!" );
2640
+ }
2641
+
2548
2642
namespace {
2549
2643
2550
2644
#ifndef MAX_PATH
@@ -3334,10 +3428,37 @@ BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
3334
3428
return NewBBPtr;
3335
3429
}
3336
3430
3431
+ bool BinaryFunction::isDataMarker (const SymbolRef &Symbol,
3432
+ uint64_t SymbolSize) const {
3433
+ // For aarch64, the ABI defines mapping symbols so we identify data in the
3434
+ // code section (see IHI0056B). $d identifies a symbol starting data contents.
3435
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64 &&
3436
+ Symbol.getType () == SymbolRef::ST_Unknown &&
3437
+ SymbolSize == 0 &&
3438
+ (!Symbol.getName ().getError () && *Symbol.getName () == " $d" ))
3439
+ return true ;
3440
+ return false ;
3441
+ }
3442
+
3443
+ bool BinaryFunction::isCodeMarker (const SymbolRef &Symbol,
3444
+ uint64_t SymbolSize) const {
3445
+ // For aarch64, the ABI defines mapping symbols so we identify data in the
3446
+ // code section (see IHI0056B). $x identifies a symbol starting code or the
3447
+ // end of a data chunk inside code.
3448
+ if (BC.TheTriple ->getArch () == llvm::Triple::aarch64 &&
3449
+ Symbol.getType () == SymbolRef::ST_Unknown &&
3450
+ SymbolSize == 0 &&
3451
+ (!Symbol.getName ().getError () && *Symbol.getName () == " $x" ))
3452
+ return true ;
3453
+ return false ;
3454
+ }
3455
+
3337
3456
bool BinaryFunction::isSymbolValidInScope (const SymbolRef &Symbol,
3338
3457
uint64_t SymbolSize) const {
3339
3458
// Some symbols are tolerated inside function bodies, others are not.
3340
3459
// The real function boundaries may not be known at this point.
3460
+ if (isDataMarker (Symbol, SymbolSize) || isCodeMarker (Symbol, SymbolSize))
3461
+ return true ;
3341
3462
3342
3463
// It's okay to have a zero-sized symbol in the middle of non-zero-sized
3343
3464
// function.
0 commit comments