Skip to content

Commit 3225b82

Browse files
authored
Merge pull request #1168 from Seanst98/sean/unique-addr-mode-per-dim-adapters
[Bindless][CUDA] Unique addressing modes per dimension
2 parents edb281f + 5fc4109 commit 3225b82

File tree

3 files changed

+50
-27
lines changed

3 files changed

+50
-27
lines changed

source/adapters/cuda/image.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -239,29 +239,38 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
239239

240240
try {
241241
/// pi_sampler_properties
242+
/// Layout of UR samplers for CUDA
243+
///
244+
/// Sampler property layout:
242245
/// | <bits> | <usage>
243246
/// -----------------------------------
244-
/// | 31 30 ... 6 | N/A
245-
/// | 5 | mip filter mode
246-
/// | 4 3 2 | addressing mode
247+
/// | 31 30 ... 12 | N/A
248+
/// | 11 | mip filter mode
249+
/// | 10 9 8 | addressing mode 3
250+
/// | 7 6 5 | addressing mode 2
251+
/// | 4 3 2 | addressing mode 1
247252
/// | 1 | filter mode
248253
/// | 0 | normalize coords
249254
CUDA_TEXTURE_DESC ImageTexDesc = {};
250-
CUaddress_mode AddrMode = {};
251-
ur_sampler_addressing_mode_t AddrModeProp = hSampler->getAddressingMode();
252-
if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
253-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
254-
AddrMode = CU_TR_ADDRESS_MODE_CLAMP;
255-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
256-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
257-
AddrMode = CU_TR_ADDRESS_MODE_BORDER;
258-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
259-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
260-
AddrMode = CU_TR_ADDRESS_MODE_WRAP;
261-
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
262-
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
263-
AddrMode = CU_TR_ADDRESS_MODE_MIRROR;
255+
CUaddress_mode AddrMode[3];
256+
for (size_t i = 0; i < 3; i++) {
257+
ur_sampler_addressing_mode_t AddrModeProp =
258+
hSampler->getAddressingModeDim(i);
259+
if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE -
260+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
261+
AddrMode[i] = CU_TR_ADDRESS_MODE_CLAMP;
262+
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_CLAMP -
263+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
264+
AddrMode[i] = CU_TR_ADDRESS_MODE_BORDER;
265+
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_REPEAT -
266+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
267+
AddrMode[i] = CU_TR_ADDRESS_MODE_WRAP;
268+
} else if (AddrModeProp == (UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT -
269+
UR_SAMPLER_ADDRESSING_MODE_NONE)) {
270+
AddrMode[i] = CU_TR_ADDRESS_MODE_MIRROR;
271+
}
264272
}
273+
265274
CUfilter_mode FilterMode;
266275
ur_sampler_filter_mode_t FilterModeProp = hSampler->getFilterMode();
267276
FilterMode =
@@ -278,14 +287,15 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
278287
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
279288
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);
280289

281-
// The address modes can interfere with other dimensionsenqueueEventsWait
290+
// The address modes can interfere with other dimensions
282291
// e.g. 1D texture sampling can be interfered with when setting other
283292
// dimension address modes despite their nonexistence.
284-
ImageTexDesc.addressMode[0] = AddrMode; // 1D
285-
ImageTexDesc.addressMode[1] =
286-
pImageDesc->height > 0 ? AddrMode : ImageTexDesc.addressMode[1]; // 2D
293+
ImageTexDesc.addressMode[0] = AddrMode[0]; // 1D
294+
ImageTexDesc.addressMode[1] = pImageDesc->height > 0
295+
? AddrMode[1]
296+
: ImageTexDesc.addressMode[1]; // 2D
287297
ImageTexDesc.addressMode[2] =
288-
pImageDesc->depth > 0 ? AddrMode : ImageTexDesc.addressMode[2]; // 3D
298+
pImageDesc->depth > 0 ? AddrMode[2] : ImageTexDesc.addressMode[2]; // 3D
289299

290300
// flags takes the normalized coordinates setting -- unnormalized is default
291301
ImageTexDesc.flags = (hSampler->isNormalizedCoords())

source/adapters/cuda/sampler.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,13 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
3737
Sampler->MaxMipmapLevelClamp = SamplerMipProperties->maxMipmapLevelClamp;
3838
Sampler->MinMipmapLevelClamp = SamplerMipProperties->minMipmapLevelClamp;
3939
Sampler->MaxAnisotropy = SamplerMipProperties->maxAnisotropy;
40-
Sampler->Props |= SamplerMipProperties->mipFilterMode << 5;
40+
Sampler->Props |= SamplerMipProperties->mipFilterMode << 11;
41+
} else if (BaseDesc->stype == UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES) {
42+
const ur_exp_sampler_addr_modes_t *SamplerAddrModes =
43+
reinterpret_cast<const ur_exp_sampler_addr_modes_t *>(pNext);
44+
Sampler->Props |= SamplerAddrModes->addrModes[0] << 2;
45+
Sampler->Props |= SamplerAddrModes->addrModes[1] << 5;
46+
Sampler->Props |= SamplerAddrModes->addrModes[2] << 8;
4147
}
4248
pNext = const_cast<void *>(BaseDesc->pNext);
4349
}

source/adapters/cuda/sampler.hpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
/// Sampler property layout:
1616
/// | <bits> | <usage>
1717
/// -----------------------------------
18-
/// | 31 30 ... 6 | N/A
19-
/// | 5 | mip filter mode
20-
/// | 4 3 2 | addressing mode
18+
/// | 31 30 ... 12 | N/A
19+
/// | 11 | mip filter mode
20+
/// | 10 9 8 | addressing mode 3
21+
/// | 7 6 5 | addressing mode 2
22+
/// | 4 3 2 | addressing mode 1
2123
/// | 1 | filter mode
2224
/// | 0 | normalize coords
2325
struct ur_sampler_handle_t_ {
@@ -50,7 +52,12 @@ struct ur_sampler_handle_t_ {
5052
return static_cast<ur_sampler_addressing_mode_t>((Props >> 2) & 0b111);
5153
}
5254

55+
ur_sampler_addressing_mode_t getAddressingModeDim(size_t i) const noexcept {
56+
return static_cast<ur_sampler_addressing_mode_t>((Props >> (2 + (i * 3))) &
57+
0b111);
58+
}
59+
5360
ur_sampler_filter_mode_t getMipFilterMode() const noexcept {
54-
return static_cast<ur_sampler_filter_mode_t>((Props >> 5) & 0b1);
61+
return static_cast<ur_sampler_filter_mode_t>((Props >> 11) & 0b1);
5562
}
5663
};

0 commit comments

Comments
 (0)