@@ -110,20 +110,8 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
110
110
// main thread's warp, so none of its threads can ever be active worker
111
111
// threads.
112
112
if (UseGenericStateMachine &&
113
- mapping::getThreadIdInBlock () < mapping::getMaxTeamThreads (IsSPMD)) {
113
+ mapping::getThreadIdInBlock () < mapping::getMaxTeamThreads (IsSPMD))
114
114
genericStateMachine (KernelEnvironment.Ident );
115
- } else {
116
- // Retrieve the work function just to ensure we always call
117
- // __kmpc_kernel_parallel even if a custom state machine is used.
118
- // TODO: this is not super pretty. The problem is we create the call to
119
- // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it is
120
- // not there yet. Thus, we assume we never reach it from
121
- // __kmpc_target_deinit. That allows us to remove the store in there to
122
- // ParallelRegionFn, which leads to bad results later on.
123
- ParallelRegionFnTy WorkFn = nullptr ;
124
- __kmpc_kernel_parallel (&WorkFn);
125
- ASSERT (WorkFn == nullptr , nullptr );
126
- }
127
115
128
116
return mapping::getThreadIdInBlock ();
129
117
}
@@ -140,8 +128,22 @@ void __kmpc_target_deinit() {
140
128
if (IsSPMD)
141
129
return ;
142
130
143
- // Signal the workers to exit the state machine and exit the kernel.
144
- state::ParallelRegionFn = nullptr ;
131
+ if (mapping::isInitialThreadInLevel0 (IsSPMD)) {
132
+ // Signal the workers to exit the state machine and exit the kernel.
133
+ state::ParallelRegionFn = nullptr ;
134
+ } else if (!state::getKernelEnvironment ()
135
+ .Configuration .UseGenericStateMachine ) {
136
+ // Retrieve the work function just to ensure we always call
137
+ // __kmpc_kernel_parallel even if a custom state machine is used.
138
+ // TODO: this is not super pretty. The problem is we create the call to
139
+ // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it
140
+ // is not there yet. Thus, we assume we never reach it from
141
+ // __kmpc_target_deinit. That allows us to remove the store in there to
142
+ // ParallelRegionFn, which leads to bad results later on.
143
+ ParallelRegionFnTy WorkFn = nullptr ;
144
+ __kmpc_kernel_parallel (&WorkFn);
145
+ ASSERT (WorkFn == nullptr , nullptr );
146
+ }
145
147
}
146
148
147
149
int8_t __kmpc_is_spmd_exec_mode () { return mapping::isSPMDMode (); }
0 commit comments