Skip to content

Commit 843019a

Browse files
[SPIR-V] Correctly use halfword-aligned byte address (microsoft#5594)
All loads and stores for ByteAddressBuffer were being aligned to the nearest word. This is incorrect behavior for 16-bit types. In order to implement this, I had to move some of the logic for calculating offsets from compile time to runtime. For loads and stores of 32 or 64-bit types with a literal byte address, the optimizer will be able to reduce this down to nearly the same code as was being output before. For 16-bit types, stores are slightly more convoluted (we need to load the old value, shift and mask it, and then combine it with the new value, in order to avoid clobbering any existing value). The optimizer isn't currently able to optimize away unnecessary shifts and masks, but my intuition is that it should be possible, in case we want to optimize this more. Fixes microsoft#5475
1 parent 6a97408 commit 843019a

13 files changed

+1248
-817
lines changed

tools/clang/lib/SPIRV/RawBufferMethods.cpp

Lines changed: 233 additions & 334 deletions
Large diffs are not rendered by default.

tools/clang/lib/SPIRV/RawBufferMethods.h

Lines changed: 61 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -23,100 +23,95 @@ class RawBufferHandler {
2323
: theEmitter(emitter), astContext(emitter.getASTContext()),
2424
spvBuilder(emitter.getSpirvBuilder()) {}
2525

26-
/// \brief Performs (RW)ByteAddressBuffer.Load<T>(address).
26+
/// \brief Performs (RW)ByteAddressBuffer.Load<T>(byteAddress).
2727
/// (RW)ByteAddressBuffers are represented as structs with only one member
2828
/// which is a runtime array in SPIR-V. This method works by loading one or
2929
/// more uints, and performing necessary casts and composite constructions
30-
/// to build the 'targetType'. The 'offset' parameter can be used for finer
31-
/// grained load of bitwidths smaller than 32-bits. The layout rule for the
32-
/// result will be `Void` because the value will be built and used internally
33-
/// only. It does not have to match `buffer`.
30+
/// to build the 'targetType'. The layout rule for the result will be `Void`
31+
/// because the value will be built and used internally only. It does not have
32+
/// to match `buffer`.
3433
///
3534
/// Example:
36-
/// targetType = uint16_t, address=0, offset=0
37-
/// --> Load the first 16-bit uint starting at address 0.
38-
/// targetType = uint16_t, address=0, offset=16
39-
/// --> Load the second 16-bit uint starting at address 0.
40-
SpirvInstruction *processTemplatedLoadFromBuffer(SpirvInstruction *buffer,
41-
SpirvInstruction *&index,
42-
const QualType targetType,
43-
uint32_t &bitOffset,
44-
SourceRange range = {});
35+
/// targetType = uint16_t, byteAddress=0
36+
/// --> Load the first 16-bit uint starting at byte address 0.
37+
SpirvInstruction *processTemplatedLoadFromBuffer(
38+
SpirvInstruction *buffer, SpirvInstruction *byteAddress,
39+
const QualType targetType, SourceRange range = {});
4540

4641
/// \brief Performs RWByteAddressBuffer.Store<T>(address, value).
4742
/// RWByteAddressBuffers are represented in SPIR-V as structs with only one
4843
/// member which is a runtime array of uints. This method works by decomposing
4944
/// the given |value| to reach numeric/bool types. Then performs necessary
5045
/// casts to uints and stores them in the underlying runtime array.
51-
/// The |bitOffset| parameter can be used for finer-grained bit-offset
52-
/// control.
5346
///
5447
/// Example:
55-
/// targetType = uint16_t, address=0, offset=0
48+
/// targetType = uint16_t, address=0
5649
/// --> Store to the first 16-bit uint starting at address 0.
57-
/// targetType = uint16_t, address=0, offset=16
58-
/// --> Store to the second 16-bit uint starting at address 0.
5950
void processTemplatedStoreToBuffer(SpirvInstruction *value,
6051
SpirvInstruction *buffer,
61-
SpirvInstruction *&index,
52+
SpirvInstruction *&byteAddress,
6253
const QualType valueType,
63-
uint32_t &bitOffset,
6454
SourceRange range = {});
6555

6656
private:
67-
SpirvInstruction *load16BitsAtBitOffset0(SpirvInstruction *buffer,
68-
SpirvInstruction *&index,
69-
QualType target16BitType,
70-
uint32_t &bitOffset,
71-
SourceRange range = {});
72-
73-
SpirvInstruction *load32BitsAtBitOffset0(SpirvInstruction *buffer,
74-
SpirvInstruction *&index,
75-
QualType target32BitType,
76-
uint32_t &bitOffset,
77-
SourceRange range = {});
78-
79-
SpirvInstruction *load64BitsAtBitOffset0(SpirvInstruction *buffer,
80-
SpirvInstruction *&index,
81-
QualType target64BitType,
82-
uint32_t &bitOffset,
83-
SourceRange range = {});
84-
85-
SpirvInstruction *load16BitsAtBitOffset16(SpirvInstruction *buffer,
86-
SpirvInstruction *&index,
87-
QualType target16BitType,
88-
uint32_t &bitOffset,
89-
SourceRange range = {});
57+
class BufferAddress {
58+
public:
59+
BufferAddress(SpirvInstruction *&byteAddress, SpirvEmitter &emitter)
60+
: byteAddress(byteAddress), wordIndex(),
61+
spvBuilder(emitter.getSpirvBuilder()),
62+
astContext(emitter.getASTContext()) {}
63+
SpirvInstruction *getByteAddress();
64+
SpirvInstruction *getWordIndex(SourceLocation loc, SourceRange range);
65+
66+
void incrementByteAddress(SpirvInstruction *width, SourceLocation loc,
67+
SourceRange range);
68+
void incrementByteAddress(uint32_t width, SourceLocation loc,
69+
SourceRange range);
70+
71+
void incrementWordIndex(SourceLocation loc, SourceRange range);
72+
73+
private:
74+
SpirvInstruction *byteAddress;
75+
llvm::Optional<SpirvInstruction *> wordIndex;
76+
77+
SpirvBuilder &spvBuilder;
78+
ASTContext &astContext;
79+
};
9080

91-
private:
92-
void store16BitsAtBitOffset0(SpirvInstruction *value,
93-
SpirvInstruction *buffer,
94-
SpirvInstruction *&index,
95-
const QualType valueType,
81+
SpirvInstruction *processTemplatedLoadFromBuffer(SpirvInstruction *buffer,
82+
BufferAddress &address,
83+
const QualType targetType,
84+
SourceRange range = {});
85+
void processTemplatedStoreToBuffer(SpirvInstruction *value,
86+
SpirvInstruction *buffer,
87+
BufferAddress &address,
88+
const QualType valueType,
89+
SourceRange range = {});
90+
91+
SpirvInstruction *load16Bits(SpirvInstruction *buffer, BufferAddress &address,
92+
QualType target16BitType,
9693
SourceRange range = {});
9794

98-
void store32BitsAtBitOffset0(SpirvInstruction *value,
99-
SpirvInstruction *buffer,
100-
SpirvInstruction *&index,
101-
const QualType valueType,
95+
SpirvInstruction *load32Bits(SpirvInstruction *buffer, BufferAddress &address,
96+
QualType target32BitType,
10297
SourceRange range = {});
10398

104-
void store64BitsAtBitOffset0(SpirvInstruction *value,
105-
SpirvInstruction *buffer,
106-
SpirvInstruction *&index,
107-
const QualType valueType,
99+
SpirvInstruction *load64Bits(SpirvInstruction *buffer, BufferAddress &address,
100+
QualType target64BitType,
108101
SourceRange range = {});
109102

110-
void store16BitsAtBitOffset16(SpirvInstruction *value,
111-
SpirvInstruction *buffer,
112-
SpirvInstruction *&index,
113-
const QualType valueType,
114-
SourceRange range = {});
103+
private:
104+
void store16Bits(SpirvInstruction *value, SpirvInstruction *buffer,
105+
BufferAddress &address, const QualType valueType,
106+
SourceRange range = {});
107+
108+
void store32Bits(SpirvInstruction *value, SpirvInstruction *buffer,
109+
BufferAddress &address, const QualType valueType,
110+
SourceRange range = {});
115111

116-
void storeArrayOfScalars(std::deque<SpirvInstruction *> values,
117-
SpirvInstruction *buffer, SpirvInstruction *&index,
118-
const QualType valueType, uint32_t &bitOffset,
119-
SourceLocation, SourceRange range = {});
112+
void store64Bits(SpirvInstruction *value, SpirvInstruction *buffer,
113+
BufferAddress &address, const QualType valueType,
114+
SourceRange range = {});
120115

121116
/// \brief Serializes the given values into their components until a scalar or
122117
/// a struct has been reached. Returns the most basic type it reaches.

tools/clang/lib/SPIRV/SpirvEmitter.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4361,33 +4361,35 @@ SpirvInstruction *SpirvEmitter::processByteAddressBufferLoadStore(
43614361
BuiltinType::UInt)
43624362
: !expr->getType()->isSpecificBuiltinType(BuiltinType::UInt));
43634363

4364-
// Do a OpShiftRightLogical by 2 (divide by 4 to get aligned memory
4365-
// access). The AST always casts the address to unsinged integer, so shift
4366-
// by unsinged integer 2.
4367-
auto *constUint2 =
4368-
spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 2));
43694364
const auto range = expr->getSourceRange();
4370-
SpirvInstruction *address = spvBuilder.createBinaryOp(
4371-
spv::Op::OpShiftRightLogical, addressType, byteAddress, constUint2,
4372-
expr->getExprLoc(), range);
43734365

43744366
if (isTemplatedLoadOrStore) {
43754367
// Templated load. Need to (potentially) perform more
43764368
// loads/casts/composite-constructs.
4377-
uint32_t bitOffset = 0;
43784369
if (doStore) {
43794370
auto *values = doExpr(expr->getArg(1));
43804371
RawBufferHandler(*this).processTemplatedStoreToBuffer(
4381-
values, objectInfo, address, expr->getArg(1)->getType(), bitOffset,
4382-
range);
4372+
values, objectInfo, byteAddress, expr->getArg(1)->getType(), range);
43834373
return nullptr;
43844374
} else {
43854375
RawBufferHandler rawBufferHandler(*this);
43864376
return rawBufferHandler.processTemplatedLoadFromBuffer(
4387-
objectInfo, address, expr->getType(), bitOffset, range);
4377+
objectInfo, byteAddress, expr->getType(), range);
43884378
}
43894379
}
43904380

4381+
// Do a OpShiftRightLogical by 2 (divide by 4 to get aligned memory
4382+
// access). The AST always casts the address to unsigned integer, so shift
4383+
// by unsigned integer 2.
4384+
auto *constUint2 =
4385+
spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 2));
4386+
SpirvInstruction *address = spvBuilder.createBinaryOp(
4387+
spv::Op::OpShiftRightLogical, addressType, byteAddress, constUint2,
4388+
expr->getExprLoc(), range);
4389+
4390+
// We might be able to reduce duplication by handling this with
4391+
// processTemplatedLoadFromBuffer
4392+
43914393
// Perform access chain into the RWByteAddressBuffer.
43924394
// First index must be zero (member 0 of the struct is a
43934395
// runtimeArray). The second index passed to OpAccessChain should be

0 commit comments

Comments
 (0)