6666#include " llvm/Support/Debug.h"
6767#include " llvm/Support/Error.h"
6868#include " llvm/Support/ErrorHandling.h"
69+ #include " llvm/Support/MathExtras.h"
6970#include " llvm/Support/Process.h"
70- #include " llvm/Support/raw_ostream.h"
7171#include " llvm/Support/Threading.h"
72+ #include " llvm/Support/raw_ostream.h"
7273#include " llvm/Target/TargetMachine.h"
7374#include " llvm/TargetParser/Triple.h"
7475#include " llvm/Transforms/Utils/ModuleUtils.h"
@@ -155,6 +156,11 @@ class PPCAsmPrinter : public AsmPrinter {
155156 TOC;
156157 const PPCSubtarget *Subtarget = nullptr ;
157158
159+ // Keep track of the number of TLS variables and their corresponding
160+ // addresses, which is then used for the assembly printing of
161+ // non-TOC-based local-exec variables.
162+ MapVector<const GlobalValue *, uint64_t > TLSVarsToAddressMapping;
163+
158164public:
159165 explicit PPCAsmPrinter (TargetMachine &TM,
160166 std::unique_ptr<MCStreamer> Streamer)
@@ -199,6 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
199205 void LowerPATCHPOINT (StackMaps &SM, const MachineInstr &MI);
200206 void EmitTlsCall (const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
201207 void EmitAIXTlsCallHelper (const MachineInstr *MI);
208+ const MCExpr *getAdjustedLocalExecExpr (const MachineOperand &MO,
209+ int64_t Offset);
202210 bool runOnMachineFunction (MachineFunction &MF) override {
203211 Subtarget = &MF.getSubtarget <PPCSubtarget>();
204212 bool Changed = AsmPrinter::runOnMachineFunction (MF);
@@ -753,6 +761,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
753761 MCInst TmpInst;
754762 const bool IsPPC64 = Subtarget->isPPC64 ();
755763 const bool IsAIX = Subtarget->isAIXABI ();
764+ const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS ();
756765 const Module *M = MF->getFunction ().getParent ();
757766 PICLevel::Level PL = M->getPICLevel ();
758767
@@ -1504,12 +1513,70 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
15041513 // Verify alignment is legal, so we don't create relocations
15051514 // that can't be supported.
15061515 unsigned OpNum = (MI->getOpcode () == PPC::STD) ? 2 : 1 ;
1516+ // For non-TOC-based local-exec TLS accesses with non-zero offsets, the
1517+ // machine operand (which is a TargetGlobalTLSAddress) is expected to be
1518+ // the same operand for both loads and stores.
1519+ for (const MachineOperand &TempMO : MI->operands ()) {
1520+ if (((TempMO.getTargetFlags () == PPCII::MO_TPREL_FLAG)) &&
1521+ TempMO.getOperandNo () == 1 )
1522+ OpNum = 1 ;
1523+ }
15071524 const MachineOperand &MO = MI->getOperand (OpNum);
15081525 if (MO.isGlobal ()) {
15091526 const DataLayout &DL = MO.getGlobal ()->getParent ()->getDataLayout ();
15101527 if (MO.getGlobal ()->getPointerAlignment (DL) < 4 )
15111528 llvm_unreachable (" Global must be word-aligned for LD, STD, LWA!" );
15121529 }
1530+ // As these load/stores share common code with the following load/stores,
1531+ // fall through to the subsequent cases in order to either process the
1532+ // non-TOC-based local-exec sequence or to process the instruction normally.
1533+ [[fallthrough]];
1534+ }
1535+ case PPC::LBZ:
1536+ case PPC::LBZ8:
1537+ case PPC::LHA:
1538+ case PPC::LHA8:
1539+ case PPC::LHZ:
1540+ case PPC::LHZ8:
1541+ case PPC::LWZ:
1542+ case PPC::LWZ8:
1543+ case PPC::STB:
1544+ case PPC::STB8:
1545+ case PPC::STH:
1546+ case PPC::STH8:
1547+ case PPC::STW:
1548+ case PPC::STW8:
1549+ case PPC::LFS:
1550+ case PPC::STFS:
1551+ case PPC::LFD:
1552+ case PPC::STFD:
1553+ case PPC::ADDI8: {
1554+ // A faster non-TOC-based local-exec sequence is represented by `addi`
1555+ // or a load/store instruction (that directly loads or stores off of the
1556+ // thread pointer) with an immediate operand having the MO_TPREL_FLAG.
1557+ // Such instructions do not otherwise arise.
1558+ if (!HasAIXSmallLocalExecTLS)
1559+ break ;
1560+ bool IsMIADDI8 = MI->getOpcode () == PPC::ADDI8;
1561+ unsigned OpNum = IsMIADDI8 ? 2 : 1 ;
1562+ const MachineOperand &MO = MI->getOperand (OpNum);
1563+ unsigned Flag = MO.getTargetFlags ();
1564+ if (Flag == PPCII::MO_TPREL_FLAG ||
1565+ Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
1566+ Flag == PPCII::MO_TPREL_PCREL_FLAG) {
1567+ LowerPPCMachineInstrToMCInst (MI, TmpInst, *this );
1568+
1569+ const MCExpr *Expr = getAdjustedLocalExecExpr (MO, MO.getOffset ());
1570+ if (Expr)
1571+ TmpInst.getOperand (OpNum) = MCOperand::createExpr (Expr);
1572+
1573+ // Change the opcode to load address if the original opcode is an `addi`.
1574+ if (IsMIADDI8)
1575+ TmpInst.setOpcode (PPC::LA8);
1576+
1577+ EmitToStreamer (*OutStreamer, TmpInst);
1578+ return ;
1579+ }
15131580 // Now process the instruction normally.
15141581 break ;
15151582 }
@@ -1523,30 +1590,73 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
15231590 EmitToStreamer (*OutStreamer, MCInstBuilder (PPC::EnforceIEIO));
15241591 return ;
15251592 }
1526- case PPC::ADDI8: {
1527- // The faster non-TOC-based local-exec sequence is represented by `addi`
1528- // with an immediate operand having the MO_TPREL_FLAG. Such an instruction
1529- // does not otherwise arise.
1530- unsigned Flag = MI->getOperand (2 ).getTargetFlags ();
1531- if (Flag == PPCII::MO_TPREL_FLAG ||
1532- Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
1533- Flag == PPCII::MO_TPREL_PCREL_FLAG) {
1534- assert (
1535- Subtarget->hasAIXSmallLocalExecTLS () &&
1536- " addi with thread-pointer only expected with local-exec small TLS" );
1537- LowerPPCMachineInstrToMCInst (MI, TmpInst, *this );
1538- TmpInst.setOpcode (PPC::LA8);
1539- EmitToStreamer (*OutStreamer, TmpInst);
1540- return ;
1541- }
1542- break ;
1543- }
15441593 }
15451594
15461595 LowerPPCMachineInstrToMCInst (MI, TmpInst, *this );
15471596 EmitToStreamer (*OutStreamer, TmpInst);
15481597}
15491598
1599+ // For non-TOC-based local-exec variables that have a non-zero offset,
1600+ // we need to create a new MCExpr that adds the non-zero offset to the address
1601+ // of the local-exec variable that will be used in either an addi, load or
1602+ // store. However, the final displacement for these instructions must be
1603+ // between [-32768, 32768), so if the TLS address + its non-zero offset is
1604+ // greater than 32KB, a new MCExpr is produced to accommodate this situation.
1605+ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr (const MachineOperand &MO,
1606+ int64_t Offset) {
1607+ // Non-zero offsets (for loads, stores or `addi`) require additional handling.
1608+ // When the offset is zero, there is no need to create an adjusted MCExpr.
1609+ if (!Offset)
1610+ return nullptr ;
1611+
1612+ assert (MO.isGlobal () && " Only expecting a global MachineOperand here!" );
1613+ const GlobalValue *GValue = MO.getGlobal ();
1614+ assert (TM.getTLSModel (GValue) == TLSModel::LocalExec &&
1615+ " Only local-exec accesses are handled!" );
1616+
1617+ bool IsGlobalADeclaration = GValue->isDeclarationForLinker ();
1618+ // Find the GlobalVariable that corresponds to the particular TLS variable
1619+ // in the TLS variable-to-address mapping. All TLS variables should exist
1620+ // within this map, with the exception of TLS variables marked as extern.
1621+ const auto TLSVarsMapEntryIter = TLSVarsToAddressMapping.find (GValue);
1622+ if (TLSVarsMapEntryIter == TLSVarsToAddressMapping.end ())
1623+ assert (IsGlobalADeclaration &&
1624+ " Only expecting to find extern TLS variables not present in the TLS "
1625+ " variable-to-address map!" );
1626+
1627+ unsigned TLSVarAddress =
1628+ IsGlobalADeclaration ? 0 : TLSVarsMapEntryIter->second ;
1629+ ptrdiff_t FinalAddress = (TLSVarAddress + Offset);
1630+ // If the address of the TLS variable + the offset is less than 32KB,
1631+ // or if the TLS variable is extern, we simply produce an MCExpr to add the
1632+ // non-zero offset to the TLS variable address.
1633+ // For when TLS variables are extern, this is safe to do because we can
1634+ // assume that the address of extern TLS variables are zero.
1635+ const MCExpr *Expr = MCSymbolRefExpr::create (
1636+ getSymbol (GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
1637+ Expr = MCBinaryExpr::createAdd (
1638+ Expr, MCConstantExpr::create (Offset, OutContext), OutContext);
1639+ if (FinalAddress >= 32768 ) {
1640+ // Handle the written offset for cases where:
1641+ // TLS variable address + Offset > 32KB.
1642+
1643+ // The assembly that is printed will look like:
1644+ // TLSVar@le + Offset - Delta
1645+ // where Delta is a multiple of 64KB: ((FinalAddress + 32768) & ~0xFFFF).
1646+ ptrdiff_t Delta = ((FinalAddress + 32768 ) & ~0xFFFF );
1647+ // Check that the total instruction displacement fits within [-32768,32768).
1648+ ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
1649+ assert ((InstDisp < 32768 ) ||
1650+ (InstDisp >= -32768 ) &&
1651+ " Expecting the instruction displacement for local-exec TLS "
1652+ " variables to be between [-32768, 32768)!" );
1653+ Expr = MCBinaryExpr::createAdd (
1654+ Expr, MCConstantExpr::create (-Delta, OutContext), OutContext);
1655+ }
1656+
1657+ return Expr;
1658+ }
1659+
15501660void PPCLinuxAsmPrinter::emitGNUAttributes (Module &M) {
15511661 // Emit float ABI into GNU attribute
15521662 Metadata *MD = M.getModuleFlag (" float-abi" );
@@ -2772,6 +2882,19 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
27722882 Csect->ensureMinAlignment (GOAlign);
27732883 };
27742884
2885+ // For all TLS variables, calculate their corresponding addresses and store
2886+ // them into TLSVarsToAddressMapping, which will be used to determine whether
2887+ // or not local-exec TLS variables require special assembly printing.
2888+ uint64_t TLSVarAddress = 0 ;
2889+ auto DL = M.getDataLayout ();
2890+ for (const auto &G : M.globals ()) {
2891+ if (G.isThreadLocal () && !G.isDeclaration ()) {
2892+ TLSVarAddress = alignTo (TLSVarAddress, getGVAlignment (&G, DL));
2893+ TLSVarsToAddressMapping[&G] = TLSVarAddress;
2894+ TLSVarAddress += DL.getTypeAllocSize (G.getValueType ());
2895+ }
2896+ }
2897+
27752898 // We need to know, up front, the alignment of csects for the assembly path,
27762899 // because once a .csect directive gets emitted, we could not change the
27772900 // alignment value on it.
0 commit comments