-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[OpenMP] Codegen support for masked combined construct #120520
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: CHANDRA GHALE (chandraghale) ChangesAdded codegen support for combined masked constructs
Added implementation for Patch is 41.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120520.diff 4 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6c7a594fb10c4c..3424f1f30c61ef 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -331,29 +331,31 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
EmitOMPMasterTaskLoopDirective(cast<OMPMasterTaskLoopDirective>(*S));
break;
case Stmt::OMPMaskedTaskLoopDirectiveClass:
- llvm_unreachable("masked taskloop directive not supported yet.");
+ EmitOMPMaskedTaskLoopDirective(cast<OMPMaskedTaskLoopDirective>(*S));
break;
case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
EmitOMPMasterTaskLoopSimdDirective(
cast<OMPMasterTaskLoopSimdDirective>(*S));
break;
case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
- llvm_unreachable("masked taskloop simd directive not supported yet.");
+ EmitOMPMaskedTaskLoopSimdDirective(
+ cast<OMPMaskedTaskLoopSimdDirective>(*S));
break;
case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
EmitOMPParallelMasterTaskLoopDirective(
cast<OMPParallelMasterTaskLoopDirective>(*S));
break;
case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
- llvm_unreachable("parallel masked taskloop directive not supported yet.");
+ EmitOMPParallelMaskedTaskLoopDirective(
+ cast<OMPParallelMaskedTaskLoopDirective>(*S));
break;
case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
EmitOMPParallelMasterTaskLoopSimdDirective(
cast<OMPParallelMasterTaskLoopSimdDirective>(*S));
break;
case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
- llvm_unreachable(
- "parallel masked taskloop simd directive not supported yet.");
+ EmitOMPParallelMaskedTaskLoopSimdDirective(
+ cast<OMPParallelMaskedTaskLoopSimdDirective>(*S));
break;
case Stmt::OMPDistributeDirectiveClass:
EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..0f41e11953a948 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7994,6 +7994,19 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
+void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
+ const OMPMaskedTaskLoopDirective &S) {
+ auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ EmitOMPTaskLoopBasedDirective(S);
+ };
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
+ CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+
void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
const OMPMasterTaskLoopSimdDirective &S) {
auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -8006,6 +8019,19 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
+void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
+ const OMPMaskedTaskLoopSimdDirective &S) {
+ auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ EmitOMPTaskLoopBasedDirective(S);
+ };
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ OMPLexicalScope Scope(*this, S);
+ CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+
void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
const OMPParallelMasterTaskLoopDirective &S) {
auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -8024,6 +8050,25 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
emitEmptyBoundParameters);
}
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
+ const OMPParallelMaskedTaskLoopDirective &S) {
+ auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitOMPTaskLoopBasedDirective(S);
+ };
+ OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+ CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+ S.getBeginLoc());
+ };
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen,
+ emitEmptyBoundParameters);
+}
+
+
void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
const OMPParallelMasterTaskLoopSimdDirective &S) {
auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -8042,6 +8087,25 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
emitEmptyBoundParameters);
}
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
+ const OMPParallelMaskedTaskLoopSimdDirective &S) {
+ auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitOMPTaskLoopBasedDirective(S);
+ };
+ OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+ CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+ S.getBeginLoc());
+ };
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop_simd, CodeGen,
+ emitEmptyBoundParameters);
+}
+
+
// Generate the instructions for '#pragma omp target update' directive.
void CodeGenFunction::EmitOMPTargetUpdateDirective(
const OMPTargetUpdateDirective &S) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 092d55355a0a17..1ecf9fb8e6a5b4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3863,12 +3863,19 @@ class CodeGenFunction : public CodeGenTypeCache {
void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S);
+ void EmitOMPMaskedTaskLoopDirective(const OMPMaskedTaskLoopDirective &S);
void
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
+ void
+ EmitOMPMaskedTaskLoopSimdDirective(const OMPMaskedTaskLoopSimdDirective &S);
void EmitOMPParallelMasterTaskLoopDirective(
const OMPParallelMasterTaskLoopDirective &S);
+ void EmitOMPParallelMaskedTaskLoopDirective(
+ const OMPParallelMaskedTaskLoopDirective &S);
void EmitOMPParallelMasterTaskLoopSimdDirective(
const OMPParallelMasterTaskLoopSimdDirective &S);
+ void EmitOMPParallelMaskedTaskLoopSimdDirective(
+ const OMPParallelMaskedTaskLoopSimdDirective &S);
void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
void EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S);
diff --git a/clang/test/OpenMP/combined_masked.c b/clang/test/OpenMP/combined_masked.c
new file mode 100644
index 00000000000000..2e90fa9f17f295
--- /dev/null
+++ b/clang/test/OpenMP/combined_masked.c
@@ -0,0 +1,486 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void parallel_masked_taskloop(){
+ #pragma omp parallel masked taskloop
+ for( int i = 0; i < N; i++)
+ ;
+
+}
+void parallel_masked_taskloop_simd(){
+ #pragma omp parallel masked taskloop simd
+ for( int i = 0; i < N; i++)
+ ;
+
+}
+void masked_taskloop(){
+ #pragma omp masked taskloop
+ for( int i = 0; i < N; i++)
+ ;
+
+}
+void masked_taskloop_simd(){
+ #pragma omp masked taskloop simd
+ for( int i = 0; i < N; i++)
+ ;
+
+}
+
+
+int main()
+{
+ parallel_masked_taskloop();
+ parallel_masked_taskloop_simd();
+ masked_taskloop();
+ masked_taskloop_simd();
+
+}
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @parallel_masked_taskloop.omp_outlined)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void @parallel_masked_taskloop.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label %[[OMP_IF_THEN:.*]], label %[[OMP_IF_END:.*]]
+// CHECK: [[OMP_IF_THEN]]:
+// CHECK-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 0, ptr @.omp_task_entry.)
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP5]], i32 0, i32 5
+// CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 6
+// CHECK-NEXT: store i64 99, ptr [[TMP7]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 7
+// CHECK-NEXT: store i64 1, ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 9
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false)
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
+// CHECK-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP4]], i32 1, ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP10]], i32 1, i32 0, i64 0, ptr null)
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label %[[OMP_IF_END]]
+// CHECK: [[OMP_IF_END]]:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal i32 @.omp_task_entry.(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4
+// CHECK-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5
+// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6
+// CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 7
+// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 8
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 9
+// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
+// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14:![0-9]+]]
+// CHECK-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META14]]
+// CHECK-NEXT: store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
+// CHECK-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND_I:.*]]
+// CHECK: [[OMP_INNER_FOR_COND_I]]:
+// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP20]] to i64
+// CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP21]]
+// CHECK-NEXT: br i1 [[CMP_I]], label %[[OMP_INNER_FOR_BODY_I:.*]], [[DOTOMP_OUTLINED__EXIT:label %.*]]
+// CHECK: [[OMP_INNER_FOR_BODY_I]]:
+// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK-NEXT: store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias [[META14]]
+// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP23]], 1
+// CHECK-NEXT: store i32 [[ADD3_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND_I]]
+// CHECK: [[_OMP_OUTLINED__EXIT:.*:]]
+// CHECK-NEXT: ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop_simd(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @parallel_masked_taskloop_simd.omp_outlined)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void @parallel_masked_taskloop_simd.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1
+// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label %[[OMP_IF_THEN:.*]], label %[[OMP_IF_END:.*]]
+// CHECK: [[OMP_IF_THEN]]:
+// CHECK-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 0, ptr @.omp_task_entry..2)
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP5]], i32 0, i32 5
+// CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 6
+// CHECK-NEXT: store i64 99, ptr [[TMP7]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 7
+// CHECK-NEXT: store i64 1, ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 9
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false)
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
+// CHECK-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP4]], i32 1, ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP10]], i32 1, i32 0, i64 0, ptr null)
+// CHECK-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label %[[OMP_IF_END]]
+// CHECK: [[OMP_IF_END]]:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal i32 @.omp_task_entry..2(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+/...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
Split into several separate patches, one for each directive |
|
closing this PR . Splitting into separate patches for each directives. |
Added codegen support for combined masked constructs
Added implementation for
EmitOMPMaskedTaskLoopDirective,EmitOMPMaskedTaskLoopSimdDirective,EmitOMPParallelMaskedTaskLoopDirectiveandEmitOMPParallelMaskedTaskLoopSimdDirective