Skip to content

Commit 3a4b351

Browse files
authored
[IR] Introduce the ptrtoaddr instruction
This introduces a new `ptrtoaddr` instruction which is similar to `ptrtoint` but has two differences: 1) Unlike `ptrtoint`, `ptrtoaddr` does not capture provenance 2) `ptrtoaddr` only extracts (and then extends/truncates) the low index-width bits of the pointer For most architectures, difference 2) does not matter since index (address) width and pointer representation width are the same, but this does make a difference for architectures that have pointers that aren't just plain integer addresses such as AMDGPU fat pointers or CHERI capabilities. This commit introduces textual and bitcode IR support as well as basic code generation, but optimization passes do not handle the new instruction yet so it may result in worse code than using ptrtoint. Follow-up changes will update capture tracking, etc. for the new instruction. RFC: https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54 Reviewed By: nikic Pull Request: llvm/llvm-project#139357
1 parent 90e8c8e commit 3a4b351

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+830
-162
lines changed

llvm/docs/LangRef.rst

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5175,6 +5175,8 @@ The following is the syntax for constant expressions:
51755175
Perform the :ref:`trunc operation <i_trunc>` on constants.
51765176
``ptrtoint (CST to TYPE)``
51775177
Perform the :ref:`ptrtoint operation <i_ptrtoint>` on constants.
5178+
``ptrtoaddr (CST to TYPE)``
5179+
Perform the :ref:`ptrtoaddr operation <i_ptrtoaddr>` on constants.
51785180
``inttoptr (CST to TYPE)``
51795181
Perform the :ref:`inttoptr operation <i_inttoptr>` on constants.
51805182
This one is *really* dangerous!
@@ -12523,6 +12525,58 @@ Example:
1252312525
%Y = ptrtoint ptr %P to i64 ; yields zero extension on 32-bit architecture
1252412526
%Z = ptrtoint <4 x ptr> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture
1252512527

12528+
.. _i_ptrtoaddr:
12529+
12530+
'``ptrtoaddr .. to``' Instruction
12531+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12532+
12533+
Syntax:
12534+
"""""""
12535+
12536+
::
12537+
12538+
<result> = ptrtoaddr <ty> <value> to <ty2> ; yields ty2
12539+
12540+
Overview:
12541+
"""""""""
12542+
12543+
The '``ptrtoaddr``' instruction converts the pointer or a vector of
12544+
pointers ``value`` to the underlying integer address (or vector of addresses) of
12545+
type ``ty2``. This is different from :ref:`ptrtoint <i_ptrtoint>` in that it
12546+
only operates on the index bits of the pointer and ignores all other bits, and
12547+
does not capture the provenance of the pointer.
12548+
12549+
Arguments:
12550+
""""""""""
12551+
12552+
The '``ptrtoaddr``' instruction takes a ``value`` to cast, which must be
12553+
a value of type :ref:`pointer <t_pointer>` or a vector of pointers, and a
12554+
type to cast it to ``ty2``, which must be must be the :ref:`integer <t_integer>`
12555+
type (or vector of integers) matching the pointer index width of the address
12556+
space of ``ty``.
12557+
12558+
Semantics:
12559+
""""""""""
12560+
12561+
The '``ptrtoaddr``' instruction converts ``value`` to integer type ``ty2`` by
12562+
interpreting the lowest index-width pointer representation bits as an integer.
12563+
If the address size and the pointer representation size are the same and
12564+
``value`` and ``ty2`` are the same size, then nothing is done (*no-op cast*)
12565+
other than a type change.
12566+
12567+
The ``ptrtoaddr`` instruction always :ref:`captures the address but not the provenance <pointercapture>`
12568+
of the pointer argument.
12569+
12570+
Example:
12571+
""""""""
12572+
This example assumes pointers in address space 1 are 64 bits in size with an
12573+
address width of 32 bits (``p1:64:64:64:32`` :ref:`datalayout string<langref_datalayout>`)
12574+
.. code-block:: llvm
12575+
12576+
%X = ptrtoaddr ptr addrspace(1) %P to i32 ; extracts low 32 bits of pointer
12577+
%Y = ptrtoaddr <4 x ptr addrspace(1)> %P to <4 x i32>; yields vector of low 32 bits for each pointer
12578+
12579+
1252612580
.. _i_inttoptr:
1252712581

1252812582
'``inttoptr .. to``' Instruction

llvm/docs/ReleaseNotes.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ Makes programs 10x faster by doing Special New Thing.
5656
Changes to the LLVM IR
5757
----------------------
5858

59+
* The `ptrtoaddr` instruction was introduced. This instruction returns the
60+
address component of a pointer type variable but unlike `ptrtoint` does not
61+
capture provenance ([#125687](https://github.com/llvm/llvm-project/pull/125687)).
62+
5963
Changes to LLVM infrastructure
6064
------------------------------
6165

llvm/include/llvm-c/Core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ typedef enum {
111111
LLVMFPTrunc = 37,
112112
LLVMFPExt = 38,
113113
LLVMPtrToInt = 39,
114+
LLVMPtrToAddr = 69,
114115
LLVMIntToPtr = 40,
115116
LLVMBitCast = 41,
116117
LLVMAddrSpaceCast = 60,

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,13 @@ class TargetTransformInfoImplBase {
731731
return 0;
732732
break;
733733
}
734+
case Instruction::PtrToAddr: {
735+
unsigned DstSize = Dst->getScalarSizeInBits();
736+
assert(DstSize == DL.getAddressSizeInBits(Src));
737+
if (DL.isLegalInteger(DstSize))
738+
return 0;
739+
break;
740+
}
734741
case Instruction::PtrToInt: {
735742
unsigned DstSize = Dst->getScalarSizeInBits();
736743
if (DL.isLegalInteger(DstSize) &&
@@ -1436,6 +1443,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
14361443
Op2Info, Operands, I);
14371444
}
14381445
case Instruction::IntToPtr:
1446+
case Instruction::PtrToAddr:
14391447
case Instruction::PtrToInt:
14401448
case Instruction::SIToFP:
14411449
case Instruction::UIToFP:

llvm/include/llvm/AsmParser/LLToken.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ enum Kind {
319319
kw_fptoui,
320320
kw_fptosi,
321321
kw_inttoptr,
322+
kw_ptrtoaddr,
322323
kw_ptrtoint,
323324
kw_bitcast,
324325
kw_addrspacecast,

llvm/include/llvm/Bitcode/LLVMBitCodes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,8 @@ enum CastOpcodes {
456456
CAST_PTRTOINT = 9,
457457
CAST_INTTOPTR = 10,
458458
CAST_BITCAST = 11,
459-
CAST_ADDRSPACECAST = 12
459+
CAST_ADDRSPACECAST = 12,
460+
CAST_PTRTOADDR = 13,
460461
};
461462

462463
/// UnaryOpcodes - These are values used in the bitcode files to encode which

llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,10 @@ class IRTranslator : public MachineFunctionPass {
486486
bool translatePtrToInt(const User &U, MachineIRBuilder &MIRBuilder) {
487487
return translateCast(TargetOpcode::G_PTRTOINT, U, MIRBuilder);
488488
}
489+
bool translatePtrToAddr(const User &U, MachineIRBuilder &MIRBuilder) {
490+
// FIXME: this is not correct for pointers with addr width != pointer width
491+
return translatePtrToInt(U, MIRBuilder);
492+
}
489493
bool translateTrunc(const User &U, MachineIRBuilder &MIRBuilder) {
490494
return translateCast(TargetOpcode::G_TRUNC, U, MIRBuilder);
491495
}

llvm/include/llvm/IR/Constants.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,8 @@ class ConstantExpr : public Constant {
11581158
LLVM_ABI static Constant *getXor(Constant *C1, Constant *C2);
11591159
LLVM_ABI static Constant *getTrunc(Constant *C, Type *Ty,
11601160
bool OnlyIfReduced = false);
1161+
LLVM_ABI static Constant *getPtrToAddr(Constant *C, Type *Ty,
1162+
bool OnlyIfReduced = false);
11611163
LLVM_ABI static Constant *getPtrToInt(Constant *C, Type *Ty,
11621164
bool OnlyIfReduced = false);
11631165
LLVM_ABI static Constant *getIntToPtr(Constant *C, Type *Ty,

llvm/include/llvm/IR/IRBuilder.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2187,7 +2187,10 @@ class IRBuilderBase {
21872187
return CreateCast(Instruction::FPExt, V, DestTy, Name, FPMathTag,
21882188
FMFSource);
21892189
}
2190-
2190+
Value *CreatePtrToAddr(Value *V, const Twine &Name = "") {
2191+
return CreateCast(Instruction::PtrToInt, V,
2192+
BB->getDataLayout().getAddressType(V->getType()), Name);
2193+
}
21912194
Value *CreatePtrToInt(Value *V, Type *DestTy,
21922195
const Twine &Name = "") {
21932196
return CreateCast(Instruction::PtrToInt, V, DestTy, Name);

llvm/include/llvm/IR/InstVisitor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ class InstVisitor {
183183
RetTy visitUIToFPInst(UIToFPInst &I) { DELEGATE(CastInst);}
184184
RetTy visitSIToFPInst(SIToFPInst &I) { DELEGATE(CastInst);}
185185
RetTy visitPtrToIntInst(PtrToIntInst &I) { DELEGATE(CastInst);}
186+
RetTy visitPtrToAddrInst(PtrToAddrInst &I) { DELEGATE(CastInst);}
186187
RetTy visitIntToPtrInst(IntToPtrInst &I) { DELEGATE(CastInst);}
187188
RetTy visitBitCastInst(BitCastInst &I) { DELEGATE(CastInst);}
188189
RetTy visitAddrSpaceCastInst(AddrSpaceCastInst &I) { DELEGATE(CastInst);}

0 commit comments

Comments
 (0)