Skip to content

Commit 2314d06

Browse files
authored
New barrier DXIL Op fixes for Validation and RDAT function compatibility info (microsoft#6291)
Fix barrier allowed ops and flags by shader kind New barrier operations lacked validation and for RDAT info: had incorrect min target and shader stage flags. - Identify barrier DXIL operations with new `is_barrier` in `hctdb.py` and generated `OP::IsDxilOpBarrier`. - Identify when a barrier op requires shader stage with group (compute-like stage), or when it requires node memory. - Add new `OptFeatureInfo_RequiresGroup` to identify function only compatible with a shader stage with a visible group for access to groupshared memory or use of group sync. - Translate to original `BarrierMode` when compatible; adds `BarrierMode::Invalid` to identify invalid cases. - Account for `DXIL::MemoryTypeFlags::AllMemory` being allowed and auto-masked by driver. - Properly set min shader model and compatible shader stage flags. - Validate barrier for shader stage. - Added new barriers to counters which were missing. Adressing parts of: microsoft#6256 and microsoft#6292 Fixes microsoft#6266
1 parent 4bfe9c8 commit 2314d06

22 files changed

+598
-60
lines changed

docs/DXIL.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3063,6 +3063,7 @@ INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Me
30633063
INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional.
30643064
INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
30653065
INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant
3066+
INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory.
30663067
INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true.
30673068
INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV.
30683069
INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature

include/dxc/DXIL/DxilConstants.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,7 @@ enum class AtomicBinOpCode : unsigned {
14841484

14851485
// Barrier/fence modes.
14861486
enum class BarrierMode : unsigned {
1487+
Invalid = 0,
14871488
SyncThreadGroup = 0x00000001,
14881489
UAVFenceGlobal = 0x00000002,
14891490
UAVFenceThreadGroup = 0x00000004,
@@ -1844,15 +1845,19 @@ enum class MemoryTypeFlag : uint32_t {
18441845
NodeInputMemory = 0x00000004, // NODE_INPUT_MEMORY
18451846
NodeOutputMemory = 0x00000008, // NODE_OUTPUT_MEMORY
18461847
AllMemory = 0x0000000F, // ALL_MEMORY
1847-
ValidMask = 0x0000000F
1848+
ValidMask = 0x0000000F,
1849+
NodeFlags = NodeInputMemory | NodeOutputMemory,
1850+
LegacyFlags = UavMemory | GroupSharedMemory,
1851+
GroupFlags = GroupSharedMemory,
18481852
};
18491853

18501854
// Corresponds to SEMANTIC_FLAG enums in HLSL
18511855
enum class BarrierSemanticFlag : uint32_t {
18521856
GroupSync = 0x00000001, // GROUP_SYNC
18531857
GroupScope = 0x00000002, // GROUP_SCOPE
18541858
DeviceScope = 0x00000004, // DEVICE_SCOPE
1855-
ValidMask = 0x00000007
1859+
ValidMask = 0x00000007,
1860+
GroupFlags = GroupSync | GroupScope,
18561861
};
18571862

18581863
// Constant for Container.
@@ -1940,8 +1945,12 @@ static_assert(ShaderFeatureInfoCount <= 40,
19401945
// support it, or to determine when the flag
19411946
// ShaderFeatureInfo_DerivativesInMeshAndAmpShaders is required.
19421947
const uint64_t OptFeatureInfo_UsesDerivatives = 0x0000010000000000ULL;
1948+
// OptFeatureInfo_RequiresGroup tracks whether a function requires a visible
1949+
// group that supports things like groupshared memory and group sync.
1950+
const uint64_t OptFeatureInfo_RequiresGroup = 0x0000020000000000ULL;
1951+
19431952
const uint64_t OptFeatureInfoShift = 40;
1944-
const unsigned OptFeatureInfoCount = 1;
1953+
const unsigned OptFeatureInfoCount = 2;
19451954
static_assert(OptFeatureInfoCount <= 23,
19461955
"OptFeatureInfo flags must fit in 23 bits; after that we need to "
19471956
"expand the FeatureInfo blob part and start defining a new set "

include/dxc/DXIL/DxilOperations.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ class OP {
125125
static bool IsDxilOpWave(OpCode C);
126126
static bool IsDxilOpGradient(OpCode C);
127127
static bool IsDxilOpFeedback(OpCode C);
128+
static bool IsDxilOpBarrier(OpCode C);
129+
static bool BarrierRequiresGroup(const llvm::CallInst *CI);
130+
static bool BarrierRequiresNode(const llvm::CallInst *CI);
131+
static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI);
128132
static bool IsDxilOpTypeName(llvm::StringRef name);
129133
static bool IsDxilOpType(llvm::StructType *ST);
130134
static bool IsDupDxilOpType(llvm::StructType *ST);

include/dxc/DXIL/DxilShaderFlags.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ class ShaderFlags {
3939
void SetShaderFlagsRaw(uint64_t data);
4040
void CombineShaderFlags(const ShaderFlags &other);
4141

42+
void ClearLocalFlags();
43+
4244
void SetDisableOptimizations(bool flag) { m_bDisableOptimizations = flag; }
4345
bool GetDisableOptimizations() const { return m_bDisableOptimizations; }
4446

@@ -214,10 +216,13 @@ class ShaderFlags {
214216
void SetWaveMMA(bool flag) { m_bWaveMMA = flag; }
215217
bool GetWaveMMA() const { return m_bWaveMMA; }
216218

217-
// Per-function flag
219+
// Per-function flags
218220
void SetUsesDerivatives(bool flag) { m_bUsesDerivatives = flag; }
219221
bool GetUsesDerivatives() const { return m_bUsesDerivatives; }
220222

223+
void SetRequiresGroup(bool flag) { m_bRequiresGroup = flag; }
224+
bool GetRequiresGroup() const { return m_bRequiresGroup; }
225+
221226
private:
222227
// Bit: 0
223228
unsigned
@@ -331,12 +336,20 @@ class ShaderFlags {
331336
m_bSampleCmpGradientOrBias : 1; // SHADER_FEATURE_SAMPLE_CMP_GRADIENT_OR_BIAS
332337
unsigned m_bExtendedCommandInfo : 1; // SHADER_FEATURE_EXTENDED_COMMAND_INFO
333338

334-
// Per-function flag
339+
// Per-function flags
335340
// Bit: 39
336341
unsigned m_bUsesDerivatives : 1; // SHADER_FEATURE_OPT_USES_DERIVATIVES
337342
// (OptFeatureInfo_UsesDerivatives)
338343

339-
uint32_t m_align1 : 24; // align to 64 bit.
344+
// m_bRequiresGroup indicates that the function requires a visible group.
345+
// For instance, to access group shared memory or use group sync.
346+
// This is necessary because shader stage is insufficient to indicate group
347+
// availability with the advent of thread launch node shaders.
348+
// Bit: 40
349+
unsigned m_bRequiresGroup : 1; // SHADER_FEATURE_OPT_REQUIRES_GROUP
350+
// (OptFeatureInfo_RequiresGroup)
351+
352+
uint32_t m_align1 : 23; // align to 64 bit.
340353
};
341354

342355
} // namespace hlsl

include/dxc/DxilContainer/RDAT_LibraryTypes.inl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,11 @@ RDAT_ENUM_START(DxilFeatureInfo2, uint32_t)
8484
RDAT_ENUM_VALUE(ExtendedCommandInfo, 0x1)
8585
// OptFeatureInfo flags
8686
RDAT_ENUM_VALUE(Opt_UsesDerivatives, 0x100)
87+
RDAT_ENUM_VALUE(Opt_RequiresGroup, 0x200)
8788
#if DEF_RDAT_ENUMS == DEF_RDAT_DUMP_IMPL
8889
static_assert(DXIL::ShaderFeatureInfoCount == 33,
8990
"otherwise, RDAT_ENUM definition needs updating");
90-
static_assert(DXIL::OptFeatureInfoCount == 1,
91+
static_assert(DXIL::OptFeatureInfoCount == 2,
9192
"otherwise, RDAT_ENUM definition needs updating");
9293
#endif
9394
RDAT_ENUM_END()

lib/DXIL/DxilModule.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -293,15 +293,15 @@ void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
293293
for (auto &itInfo : m_FuncToShaderCompat)
294294
Flags.CombineShaderFlags(itInfo.second.shaderFlags);
295295

296-
// Clear UsesDerivatives flag for module, making sure
297-
// DerivativesInMeshAndAmpShaders is set for MS/AS.
296+
const ShaderModel *SM = GetShaderModel();
297+
298+
// Set DerivativesInMeshAndAmpShaders if necessary for MS/AS.
298299
if (Flags.GetUsesDerivatives()) {
299-
Flags.SetUsesDerivatives(false);
300-
if (m_pSM->IsMS() || m_pSM->IsAS())
300+
if (SM->IsMS() || SM->IsAS())
301301
Flags.SetDerivativesInMeshAndAmpShaders(true);
302302
}
303-
304-
const ShaderModel *SM = GetShaderModel();
303+
// Clear function-local flags not intended for the module.
304+
Flags.ClearLocalFlags();
305305

306306
unsigned NumUAVs = 0;
307307
const unsigned kSmallUAVCount = 8;
@@ -2117,11 +2117,11 @@ bool DxilModule::ShaderCompatInfo::Merge(ShaderCompatInfo &other) {
21172117
// Compare that minimum required version to the values passed in with
21182118
// `minMajor` and `minMinor` and pass the maximum of those back through those
21192119
// same variables.
2120-
// Return adjusted `ShaderFlags` according to `props` set.
2121-
static ShaderFlags
2122-
AdjustMinimumShaderModelAndFlags(ShaderFlags flags,
2123-
const DxilFunctionProps *props,
2124-
unsigned &minMajor, unsigned &minMinor) {
2120+
// Adjusts `ShaderFlags` argument according to `props` set.
2121+
static void AdjustMinimumShaderModelAndFlags(const DxilFunctionProps *props,
2122+
ShaderFlags &flags,
2123+
unsigned &minMajor,
2124+
unsigned &minMinor) {
21252125
// Adjust flags based on DxilFunctionProps and compute minimum shader model.
21262126
// Library functions use flags to capture properties that may or may not be
21272127
// used in the final shader, depending on that final shader's shader model.
@@ -2189,8 +2189,6 @@ AdjustMinimumShaderModelAndFlags(ShaderFlags flags,
21892189
DXIL::UpdateToMaxOfVersions(minMajor, minMinor, 6, 2);
21902190
else if (flags.GetViewID() || flags.GetBarycentrics())
21912191
DXIL::UpdateToMaxOfVersions(minMajor, minMinor, 6, 1);
2192-
2193-
return flags;
21942192
}
21952193

21962194
void DxilModule::ComputeShaderCompatInfo() {
@@ -2277,8 +2275,8 @@ void DxilModule::ComputeShaderCompatInfo() {
22772275
ShaderFlags &flags = info.shaderFlags;
22782276
if (dxil18Plus) {
22792277
// This handles WaveSize requirement as well.
2280-
flags = AdjustMinimumShaderModelAndFlags(flags, props, info.minMajor,
2281-
info.minMinor);
2278+
AdjustMinimumShaderModelAndFlags(props, flags, info.minMajor,
2279+
info.minMinor);
22822280
} else {
22832281
// Match prior versions that were missing some feature detection.
22842282
if (flags.GetUseNativeLowPrecision() && flags.GetLowPrecisionPresent())

lib/DXIL/DxilOperations.cpp

Lines changed: 160 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2876,6 +2876,135 @@ bool OP::IsDxilOpFeedback(OpCode C) {
28762876
// OPCODE-FEEDBACK:END
28772877
}
28782878

2879+
bool OP::IsDxilOpBarrier(OpCode C) {
2880+
unsigned op = (unsigned)C;
2881+
// clang-format off
2882+
// Python lines need to be not formatted.
2883+
/* <py::lines('OPCODE-BARRIER')>hctdb_instrhelp.get_instrs_pred("op", "is_barrier")</py>*/
2884+
// clang-format on
2885+
// OPCODE-BARRIER:BEGIN
2886+
// Instructions: Barrier=80, BarrierByMemoryType=244,
2887+
// BarrierByMemoryHandle=245, BarrierByNodeRecordHandle=246
2888+
return op == 80 || (244 <= op && op <= 246);
2889+
// OPCODE-BARRIER:END
2890+
}
2891+
2892+
static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags,
2893+
unsigned allowedMask) {
2894+
// If the memory type is AllMemory, masking inapplicable flags is allowed.
2895+
if (memoryTypeFlags != (unsigned)DXIL::MemoryTypeFlag::AllMemory)
2896+
return memoryTypeFlags;
2897+
return memoryTypeFlags & allowedMask;
2898+
}
2899+
2900+
bool OP::BarrierRequiresGroup(const llvm::CallInst *CI) {
2901+
OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
2902+
switch (opcode) {
2903+
case OpCode::Barrier: {
2904+
DxilInst_Barrier barrier(const_cast<CallInst *>(CI));
2905+
if (isa<ConstantInt>(barrier.get_barrierMode())) {
2906+
unsigned mode = barrier.get_barrierMode_val();
2907+
return (mode != (unsigned)DXIL::BarrierMode::UAVFenceGlobal);
2908+
}
2909+
return false;
2910+
}
2911+
case OpCode::BarrierByMemoryType: {
2912+
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
2913+
if (isa<ConstantInt>(barrier.get_MemoryTypeFlags())) {
2914+
unsigned memoryTypeFlags = barrier.get_MemoryTypeFlags_val();
2915+
memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
2916+
memoryTypeFlags, ~(unsigned)DXIL::MemoryTypeFlag::GroupFlags);
2917+
if (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::GroupFlags)
2918+
return true;
2919+
}
2920+
}
2921+
LLVM_FALLTHROUGH;
2922+
case OpCode::BarrierByMemoryHandle:
2923+
case OpCode::BarrierByNodeRecordHandle: {
2924+
// BarrierByMemoryType, BarrierByMemoryHandle, and BarrierByNodeRecordHandle
2925+
// all have semanticFlags as the second operand.
2926+
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
2927+
if (isa<ConstantInt>(barrier.get_SemanticFlags())) {
2928+
unsigned semanticFlags = barrier.get_SemanticFlags_val();
2929+
if (semanticFlags & (unsigned)DXIL::BarrierSemanticFlag::GroupFlags)
2930+
return true;
2931+
}
2932+
return false;
2933+
}
2934+
default:
2935+
return false;
2936+
}
2937+
}
2938+
2939+
bool OP::BarrierRequiresNode(const llvm::CallInst *CI) {
2940+
OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
2941+
switch (opcode) {
2942+
case OpCode::BarrierByNodeRecordHandle:
2943+
return true;
2944+
case OpCode::BarrierByMemoryType: {
2945+
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
2946+
if (isa<ConstantInt>(barrier.get_MemoryTypeFlags())) {
2947+
unsigned memoryTypeFlags = barrier.get_MemoryTypeFlags_val();
2948+
// Mask off node flags, if allowed.
2949+
memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
2950+
memoryTypeFlags, ~(unsigned)DXIL::MemoryTypeFlag::NodeFlags);
2951+
return (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::NodeFlags) != 0;
2952+
}
2953+
return false;
2954+
}
2955+
default:
2956+
return false;
2957+
}
2958+
}
2959+
2960+
DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) {
2961+
OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
2962+
switch (opcode) {
2963+
case OpCode::Barrier: {
2964+
DxilInst_Barrier barrier(const_cast<CallInst *>(CI));
2965+
if (isa<ConstantInt>(barrier.get_barrierMode())) {
2966+
unsigned mode = barrier.get_barrierMode_val();
2967+
return static_cast<DXIL::BarrierMode>(mode);
2968+
}
2969+
return DXIL::BarrierMode::Invalid;
2970+
}
2971+
case OpCode::BarrierByMemoryType: {
2972+
unsigned memoryTypeFlags = 0;
2973+
unsigned semanticFlags = 0;
2974+
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
2975+
if (isa<ConstantInt>(barrier.get_MemoryTypeFlags())) {
2976+
memoryTypeFlags = barrier.get_MemoryTypeFlags_val();
2977+
}
2978+
if (isa<ConstantInt>(barrier.get_SemanticFlags())) {
2979+
semanticFlags = barrier.get_SemanticFlags_val();
2980+
}
2981+
2982+
// Mask to legacy flags, if allowed.
2983+
memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
2984+
memoryTypeFlags, (unsigned)DXIL::MemoryTypeFlag::LegacyFlags);
2985+
if (memoryTypeFlags & ~(unsigned)DXIL::MemoryTypeFlag::LegacyFlags)
2986+
return DXIL::BarrierMode::Invalid;
2987+
2988+
unsigned mode = 0;
2989+
if (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::GroupSharedMemory)
2990+
mode |= (unsigned)DXIL::BarrierMode::TGSMFence;
2991+
if (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::UavMemory) {
2992+
if (semanticFlags & (unsigned)DXIL::BarrierSemanticFlag::DeviceScope) {
2993+
mode |= (unsigned)DXIL::BarrierMode::UAVFenceGlobal;
2994+
} else if (semanticFlags &
2995+
(unsigned)DXIL::BarrierSemanticFlag::GroupScope) {
2996+
mode |= (unsigned)DXIL::BarrierMode::UAVFenceThreadGroup;
2997+
}
2998+
}
2999+
if (semanticFlags & (unsigned)DXIL::BarrierSemanticFlag::GroupSync)
3000+
mode |= (unsigned)DXIL::BarrierMode::SyncThreadGroup;
3001+
return static_cast<DXIL::BarrierMode>(mode);
3002+
}
3003+
default:
3004+
return DXIL::BarrierMode::Invalid;
3005+
}
3006+
}
3007+
28793008
#define SFLAG(stage) ((unsigned)1 << (unsigned)DXIL::ShaderKind::stage)
28803009
void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
28813010
unsigned &major, unsigned &minor,
@@ -3168,9 +3297,8 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
31683297
SFLAG(Mesh) | SFLAG(Pixel) | SFLAG(Node);
31693298
return;
31703299
}
3171-
// Instructions: BarrierByMemoryType=244, BarrierByMemoryHandle=245,
3172-
// BarrierByNodeRecordHandle=246, SampleCmpGrad=254
3173-
if ((244 <= op && op <= 246) || op == 254) {
3300+
// Instructions: BarrierByMemoryHandle=245, SampleCmpGrad=254
3301+
if (op == 245 || op == 254) {
31743302
major = 6;
31753303
minor = 8;
31763304
return;
@@ -3185,11 +3313,11 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
31853313
}
31863314
// Instructions: AllocateNodeOutputRecords=238, GetNodeRecordPtr=239,
31873315
// IncrementOutputCount=240, OutputComplete=241, GetInputRecordCount=242,
3188-
// FinishedCrossGroupSharing=243, CreateNodeOutputHandle=247,
3189-
// IndexNodeHandle=248, AnnotateNodeHandle=249,
3316+
// FinishedCrossGroupSharing=243, BarrierByNodeRecordHandle=246,
3317+
// CreateNodeOutputHandle=247, IndexNodeHandle=248, AnnotateNodeHandle=249,
31903318
// CreateNodeInputRecordHandle=250, AnnotateNodeRecordHandle=251,
31913319
// NodeOutputIsValid=252, GetRemainingRecursionLevels=253
3192-
if ((238 <= op && op <= 243) || (247 <= op && op <= 253)) {
3320+
if ((238 <= op && op <= 243) || (246 <= op && op <= 253)) {
31933321
major = 6;
31943322
minor = 8;
31953323
mask = SFLAG(Node);
@@ -3202,6 +3330,17 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
32023330
mask = SFLAG(Vertex);
32033331
return;
32043332
}
3333+
// Instructions: BarrierByMemoryType=244
3334+
if (op == 244) {
3335+
if (bWithTranslation) {
3336+
major = 6;
3337+
minor = 0;
3338+
} else {
3339+
major = 6;
3340+
minor = 8;
3341+
}
3342+
return;
3343+
}
32053344
// Instructions: WaveMatrix_Annotate=226, WaveMatrix_Depth=227,
32063345
// WaveMatrix_Fill=228, WaveMatrix_LoadRawBuf=229,
32073346
// WaveMatrix_LoadGroupShared=230, WaveMatrix_StoreRawBuf=231,
@@ -3249,20 +3388,23 @@ void OP::GetMinShaderModelAndMask(const llvm::CallInst *CI,
32493388

32503389
// Additional rules are applied manually here.
32513390

3252-
// Barrier with mode != UAVFenceGlobal requires compute, amplification,
3253-
// mesh, or node. Instructions: Barrier=80
3254-
if (opcode == DXIL::OpCode::Barrier) {
3255-
// Barrier mode should be a constant, but be robust to non-constants here.
3256-
if (isa<ConstantInt>(
3257-
CI->getArgOperand(DxilInst_Barrier::arg_barrierMode))) {
3258-
DxilInst_Barrier barrier(const_cast<CallInst *>(CI));
3259-
unsigned mode = barrier.get_barrierMode_val();
3260-
if (mode != (unsigned)DXIL::BarrierMode::UAVFenceGlobal) {
3261-
mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) |
3262-
SFLAG(Mesh) | SFLAG(Node);
3391+
// Barrier requiring node or group limit shader kinds.
3392+
if (IsDxilOpBarrier(opcode)) {
3393+
// If BarrierByMemoryType, check if translatable, or set min to 6.8.
3394+
if (bWithTranslation && opcode == DXIL::OpCode::BarrierByMemoryType) {
3395+
if (TranslateToBarrierMode(CI) == DXIL::BarrierMode::Invalid) {
3396+
major = 6;
3397+
minor = 8;
32633398
}
32643399
}
3265-
return;
3400+
if (BarrierRequiresNode(CI)) {
3401+
mask &= SFLAG(Library) | SFLAG(Node);
3402+
return;
3403+
} else if (BarrierRequiresGroup(CI)) {
3404+
mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) |
3405+
SFLAG(Mesh) | SFLAG(Node);
3406+
return;
3407+
}
32663408
}
32673409

32683410
// 64-bit integer atomic ops require 6.6

0 commit comments

Comments
 (0)