Skip to content

Commit c067d74

Browse files
authored
[clang] [Xteam] Change the default blocksize from 1024 to 512. (llvm#3654)
2 parents 2d26048 + 8223a70 commit c067d74

11 files changed

+470
-471
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ LANGOPT(OpenMPCUDANumSMs , 32, 0, NotCompatible, "Number of SMs for CUDA device
229229
LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, NotCompatible, "Number of blocks per SM for CUDA devices.")
230230
LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, NotCompatible, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.")
231231
LANGOPT(OpenMPGPUThreadsPerTeam, 32, 256, NotCompatible, "Number of threads per team for GPUs.")
232-
LANGOPT(OpenMPTargetXteamReductionBlockSize, 32, 1024, NotCompatible, "Number of threads in a block used by cross-team reduction.")
232+
LANGOPT(OpenMPTargetXteamReductionBlockSize, 32, 512, NotCompatible, "Number of threads in a block used by cross-team reduction.")
233233
LANGOPT(OpenMPTargetDebug , 32, 0, NotCompatible, "Enable debugging in the OpenMP offloading device RTL")
234234
LANGOPT(OpenMPTargetIgnoreEnvVars , 1, 0, NotCompatible, "Generate code assuming that device related environment variables can be ignored.")
235235
LANGOPT(OpenMPTargetBigJumpLoop , 1, 1, NotCompatible, "Use big jump loop code generation technique.")

clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ int main() {
107107
// CHECK1-NEXT: [[DEFAULT_DEV:%.*]] = call i32 @omp_get_default_device()
108108
// CHECK1-NEXT: [[TEAM_PROCS:%.*]] = call i32 @ompx_get_team_procs(i32 [[DEFAULT_DEV]])
109109
// CHECK1-NEXT: [[TMP0:%.*]] = zext i32 [[TEAM_PROCS]] to i64
110-
// CHECK1-NEXT: [[TMP1:%.*]] = mul i64 2, [[TMP0]]
110+
// CHECK1-NEXT: [[TMP1:%.*]] = mul i64 4, [[TMP0]]
111111
// CHECK1-NEXT: [[INITIAL_DEVID:%.*]] = call i32 @omp_get_initial_device()
112112
// CHECK1-NEXT: [[D_TEAM_VALS_SZ:%.*]] = mul i64 4, [[TMP1]]
113113
// CHECK1-NEXT: [[D_TEAM_VALS1:%.*]] = call ptr @omp_target_alloc(i64 [[D_TEAM_VALS_SZ]], i32 [[DEFAULT_DEV]])
@@ -182,7 +182,7 @@ int main() {
182182
// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
183183
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
184184
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
185-
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
185+
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
186186
// CHECK1-NEXT: [[D_TEAM_VALS:%.*]] = alloca i32, align 4
187187
// CHECK1-NEXT: store ptr null, ptr [[D_TEAM_VALS]], align 4
188188
// CHECK1-NEXT: [[D_TEAMS_DONE_PTR:%.*]] = alloca i32, align 4
@@ -213,7 +213,7 @@ int main() {
213213
// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
214214
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
215215
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
216-
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
216+
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
217217
// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4
218218
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
219219
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4
@@ -310,7 +310,7 @@ int main() {
310310
// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
311311
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
312312
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
313-
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
313+
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
314314
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
315315
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
316316
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
@@ -446,7 +446,7 @@ int main() {
446446
// CHECK1-NEXT: [[DEFAULT_DEV:%.*]] = call i32 @omp_get_default_device()
447447
// CHECK1-NEXT: [[TEAM_PROCS:%.*]] = call i32 @ompx_get_team_procs(i32 [[DEFAULT_DEV]])
448448
// CHECK1-NEXT: [[TMP0:%.*]] = zext i32 [[TEAM_PROCS]] to i64
449-
// CHECK1-NEXT: [[TMP1:%.*]] = mul i64 2, [[TMP0]]
449+
// CHECK1-NEXT: [[TMP1:%.*]] = mul i64 4, [[TMP0]]
450450
// CHECK1-NEXT: [[INITIAL_DEVID:%.*]] = call i32 @omp_get_initial_device()
451451
// CHECK1-NEXT: [[D_TEAM_VALS_SZ:%.*]] = mul i64 4, [[TMP1]]
452452
// CHECK1-NEXT: [[D_TEAM_VALS1:%.*]] = call ptr @omp_target_alloc(i64 [[D_TEAM_VALS_SZ]], i32 [[DEFAULT_DEV]])
@@ -520,7 +520,7 @@ int main() {
520520
// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8
521521
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
522522
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
523-
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8
523+
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
524524
// CHECK1-NEXT: [[D_TEAM_VALS:%.*]] = alloca i32, align 4
525525
// CHECK1-NEXT: store ptr null, ptr [[D_TEAM_VALS]], align 4
526526
// CHECK1-NEXT: [[D_TEAMS_DONE_PTR:%.*]] = alloca i32, align 4
@@ -551,7 +551,7 @@ int main() {
551551
// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8
552552
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
553553
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
554-
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8
554+
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
555555
// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4
556556
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
557557
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4
@@ -648,7 +648,7 @@ int main() {
648648
// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8
649649
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
650650
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
651-
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8
651+
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
652652
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
653653
// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
654654
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
@@ -782,7 +782,7 @@ int main() {
782782
// CHECK3-NEXT: [[DEFAULT_DEV:%.*]] = call i32 @omp_get_default_device()
783783
// CHECK3-NEXT: [[TEAM_PROCS:%.*]] = call i32 @ompx_get_team_procs(i32 [[DEFAULT_DEV]])
784784
// CHECK3-NEXT: [[TMP0:%.*]] = zext i32 [[TEAM_PROCS]] to i64
785-
// CHECK3-NEXT: [[TMP1:%.*]] = mul i64 2, [[TMP0]]
785+
// CHECK3-NEXT: [[TMP1:%.*]] = mul i64 4, [[TMP0]]
786786
// CHECK3-NEXT: [[INITIAL_DEVID:%.*]] = call i32 @omp_get_initial_device()
787787
// CHECK3-NEXT: [[D_TEAM_VALS_SZ:%.*]] = mul i64 4, [[TMP1]]
788788
// CHECK3-NEXT: [[D_TEAM_VALS1:%.*]] = call ptr @omp_target_alloc(i64 [[D_TEAM_VALS_SZ]], i32 [[DEFAULT_DEV]])
@@ -857,7 +857,7 @@ int main() {
857857
// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4
858858
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
859859
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
860-
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4
860+
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
861861
// CHECK3-NEXT: [[D_TEAM_VALS:%.*]] = alloca i32, align 4
862862
// CHECK3-NEXT: store ptr null, ptr [[D_TEAM_VALS]], align 4
863863
// CHECK3-NEXT: [[D_TEAMS_DONE_PTR:%.*]] = alloca i32, align 4
@@ -888,7 +888,7 @@ int main() {
888888
// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4
889889
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
890890
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
891-
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4
891+
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
892892
// CHECK3-NEXT: store i32 0, ptr [[SIVAR2]], align 4
893893
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
894894
// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4
@@ -983,7 +983,7 @@ int main() {
983983
// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4
984984
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
985985
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
986-
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4
986+
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
987987
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
988988
// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
989989
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
@@ -1117,7 +1117,7 @@ int main() {
11171117
// CHECK3-NEXT: [[DEFAULT_DEV:%.*]] = call i32 @omp_get_default_device()
11181118
// CHECK3-NEXT: [[TEAM_PROCS:%.*]] = call i32 @ompx_get_team_procs(i32 [[DEFAULT_DEV]])
11191119
// CHECK3-NEXT: [[TMP0:%.*]] = zext i32 [[TEAM_PROCS]] to i64
1120-
// CHECK3-NEXT: [[TMP1:%.*]] = mul i64 2, [[TMP0]]
1120+
// CHECK3-NEXT: [[TMP1:%.*]] = mul i64 4, [[TMP0]]
11211121
// CHECK3-NEXT: [[INITIAL_DEVID:%.*]] = call i32 @omp_get_initial_device()
11221122
// CHECK3-NEXT: [[D_TEAM_VALS_SZ:%.*]] = mul i64 4, [[TMP1]]
11231123
// CHECK3-NEXT: [[D_TEAM_VALS1:%.*]] = call ptr @omp_target_alloc(i64 [[D_TEAM_VALS_SZ]], i32 [[DEFAULT_DEV]])
@@ -1191,7 +1191,7 @@ int main() {
11911191
// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4
11921192
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
11931193
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
1194-
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4
1194+
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
11951195
// CHECK3-NEXT: [[D_TEAM_VALS:%.*]] = alloca i32, align 4
11961196
// CHECK3-NEXT: store ptr null, ptr [[D_TEAM_VALS]], align 4
11971197
// CHECK3-NEXT: [[D_TEAMS_DONE_PTR:%.*]] = alloca i32, align 4
@@ -1222,7 +1222,7 @@ int main() {
12221222
// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4
12231223
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
12241224
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
1225-
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4
1225+
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
12261226
// CHECK3-NEXT: store i32 0, ptr [[T_VAR2]], align 4
12271227
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
12281228
// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4
@@ -1317,7 +1317,7 @@ int main() {
13171317
// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4
13181318
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
13191319
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4
1320-
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4
1320+
// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
13211321
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
13221322
// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
13231323
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
@@ -1447,7 +1447,7 @@ int main() {
14471447
// CHECK5-NEXT: entry:
14481448
// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8
14491449
// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
1450-
// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
1450+
// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
14511451
// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined, ptr [[TMP0]])
14521452
// CHECK5-NEXT: ret void
14531453
//
@@ -1470,7 +1470,7 @@ int main() {
14701470
// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
14711471
// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
14721472
// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
1473-
// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
1473+
// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8, !nonnull [[META5]], !align [[META6]]
14741474
// CHECK5-NEXT: store i32 0, ptr [[SIVAR1]], align 4
14751475
// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
14761476
// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4
@@ -1560,7 +1560,7 @@ int main() {
15601560
// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
15611561
// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
15621562
// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8
1563-
// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8
1563+
// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8, !nonnull [[META5]], !align [[META6]]
15641564
// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
15651565
// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4
15661566
// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8

0 commit comments

Comments
 (0)