@@ -783,24 +783,27 @@ def NVVM_SyncWarpOp :
783783 let assemblyFormat = "$mask attr-dict `:` type($mask)";
784784}
785785
786-
787- def NVVM_ElectSyncOp : NVVM_Op<"elect.sync",
788- [DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>]>
786+ def NVVM_ElectSyncOp : NVVM_Op<"elect.sync">
789787{
788+ let summary = "Elect one leader thread";
789+ let description = [{
790+ The `elect.sync` instruction elects one predicated active leader
791+ thread from among a set of threads specified in membermask.
792+ The membermask is set to `0xFFFFFFFF` for the current version
793+ of this Op. The predicate result is set to `True` for the
794+ leader thread, and `False` for all other threads.
795+
796+ [For more information, see PTX ISA]
797+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync)
798+ }];
799+
790800 let results = (outs I1:$pred);
791801 let assemblyFormat = "attr-dict `->` type(results)";
792- let extraClassDefinition = [{
793- std::string $cppClass::getPtx() {
794- return std::string(
795- "{ \n"
796- ".reg .u32 rx; \n"
797- ".reg .pred px; \n"
798- " mov.pred %0, 0; \n"
799- " elect.sync rx | px, 0xFFFFFFFF;\n"
800- "@px mov.pred %0, 1; \n"
801- "}\n"
802- );
803- }
802+ string llvmBuilder = [{
803+ auto *resultTuple = createIntrinsicCall(builder,
804+ llvm::Intrinsic::nvvm_elect_sync, {builder.getInt32(0xFFFFFFFF)});
805+ // Extract the second value into $pred
806+ $pred = builder.CreateExtractValue(resultTuple, 1);
804807 }];
805808}
806809
0 commit comments