Skip to content

Commit 755f808

Browse files
committed
[OpenMP][Clang][DeviceRTL] Support Multiple Datatypes for Xteam Scan
Create datatype specific entry points to the wrapped DeviceRTL functions for more datatypes in the Xteams.cpp and generate calls to respective entry points based upon the llvm::Type of the reduction variable. Add corresponding LIT and execution tests as well. Extending No-Loop scan for generic types will come as a separate patch. Change-Id: I7039fc1dc82d6f58a0ba99cb3a2cf4152194a498
1 parent 5b362a0 commit 755f808

File tree

8 files changed

+3957
-55
lines changed

8 files changed

+3957
-55
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 283 additions & 22 deletions
Large diffs are not rendered by default.

clang/lib/CodeGen/CGStmt.cpp

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
549549
assert(Itr != RedVarMap.end() && "Metadata not found");
550550

551551
const CodeGenModule::XteamRedVarInfo &RVI = Itr->second;
552+
llvm::Type *RedVarType = ConvertTypeForMem(XteamVD->getType());
552553

553554
assert(RVI.ArgPos + 1 < Args->size() && "Arg position beyond bounds");
554555

@@ -568,9 +569,9 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
568569
// {
569570
// RedVar += TeamVals[TeamID - 1]
570571
// }
571-
572572
Address ScanStorageValGEP = Address(
573-
Builder.CreateGEP(Int32Ty, DScanStorage, GlobalGpuThreadId), Int32Ty,
573+
Builder.CreateGEP(RedVarType, DScanStorage, GlobalGpuThreadId),
574+
RedVarType,
574575
getContext().getTypeAlignInChars(
575576
XteamVD->getType())); // Storage[GlobalTID]
576577
Builder.CreateStore(Builder.CreateLoad(ScanStorageValGEP),
@@ -586,10 +587,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
586587
EmitBlock(IsAfterFirstTeamThenBlock);
587588
Address PrevTeamValGEP =
588589
Address(Builder.CreateGEP(
589-
Int32Ty, DTeamVals,
590+
RedVarType, DTeamVals,
590591
Builder.CreateSub(WorkGroupId,
591592
llvm::ConstantInt::get(Int32Ty, 1))),
592-
Int32Ty,
593+
RedVarType,
593594
getContext().getTypeAlignInChars(
594595
XteamVD->getType())); // TeamVals[TeamID - 1]
595596
Builder.CreateStore(Builder.CreateAdd(Builder.CreateLoad(RVI.RedVarAddr),
@@ -614,7 +615,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
614615
// }
615616
// }
616617

617-
Builder.CreateStore(llvm::ConstantInt::get(Int32Ty, 0),
618+
Builder.CreateStore(llvm::ConstantInt::get(RedVarType, 0),
618619
RVI.RedVarAddr); // RedVar = 0
619620
llvm::Value *IsNotFirstThread = Builder.CreateICmpUGE(
620621
GlobalGpuThreadId,
@@ -630,8 +631,8 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
630631
GlobalGpuThreadId,
631632
llvm::ConstantInt::get(Int32Ty, 1)); // GlobalTID - 1
632633
Address ScanStoragePrevValGEP = Address(
633-
Builder.CreateGEP(Int32Ty, DScanStorage, PrevGlobalGpuThreadId),
634-
Int32Ty,
634+
Builder.CreateGEP(RedVarType, DScanStorage, PrevGlobalGpuThreadId),
635+
RedVarType,
635636
getContext().getTypeAlignInChars(
636637
XteamVD->getType())); // Storage[GlobalTID - 1]
637638
Builder.CreateStore(Builder.CreateLoad(ScanStoragePrevValGEP),
@@ -656,10 +657,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
656657
EmitBlock(IsNotFirstThreadInTeamThenBlock);
657658
Address PrevTeamValGEP =
658659
Address(Builder.CreateGEP(
659-
Int32Ty, DTeamVals,
660+
RedVarType, DTeamVals,
660661
Builder.CreateSub(WorkGroupId,
661662
llvm::ConstantInt::get(Int32Ty, 1))),
662-
Int32Ty,
663+
RedVarType,
663664
getContext().getTypeAlignInChars(
664665
XteamVD->getType())); // TeamVals[TeamID - 1]
665666
Builder.CreateStore(Builder.CreateAdd(Builder.CreateLoad(RVI.RedVarAddr),
@@ -676,10 +677,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
676677
EmitBlock(IsAfterSecondTeamThenBlock);
677678
Address PrevPrevTeamValGEP =
678679
Address(Builder.CreateGEP(
679-
Int32Ty, DTeamVals,
680+
RedVarType, DTeamVals,
680681
Builder.CreateSub(WorkGroupId,
681682
llvm::ConstantInt::get(Int32Ty, 2))),
682-
Int32Ty,
683+
RedVarType,
683684
getContext().getTypeAlignInChars(
684685
XteamVD->getType())); // TeamVals[TeamID - 2]
685686
Builder.CreateStore(
@@ -2307,12 +2308,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
23072308

23082309
llvm::Value *SegmentLoopUB = nullptr;
23092310
llvm::Value *DSegmentVals = nullptr;
2310-
llvm::Value *ThreadLevelRes = nullptr;
23112311
llvm::Value *GlobalUpperBound = nullptr;
23122312
const Address *RedVarAddr = nullptr;
23132313
llvm::BasicBlock *ExecBB = nullptr;
23142314
llvm::BasicBlock *DoneBB = nullptr;
2315-
clang::QualType RedVarType;
2315+
const clang::VarDecl *XteamVD;
2316+
llvm::Type *RedVarType;
23162317
if (getLangOpts().OpenMPIsTargetDevice && CGM.isXteamSegmentedScanKernel()) {
23172318
// Compute Loop trip-count (N) = GlobalUB - GlobalLB + 1
23182319
const auto UBLValue = EmitLValue(
@@ -2368,19 +2369,19 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
23682369
Builder.CreateMul(SegmentSizeForScan, GlobalGpuThreadId),
23692370
BigJumpLoopIvAddr); // *iv = GlobalTID * Seg_Size
23702371

2371-
// Every thread loops till just before the SegmentLoopUB = (GlobaTID + 1) *
2372-
// Seg_Size
2372+
// Every thread loops till just before the SegmentLoopUB:
2373+
// SegmentLoopUB = (GlobaTID + 1) * Seg_Size
23732374
SegmentLoopUB = Builder.CreateMul(
23742375
SegmentSizeForScan,
23752376
Builder.CreateAdd(GlobalGpuThreadId,
23762377
llvm::ConstantInt::get(Int32Ty, 1)));
23772378

2378-
auto XteamVD = *(CGM.getXteamOrderedRedVar(&S).begin());
2379+
XteamVD = *(CGM.getXteamOrderedRedVar(&S).begin());
2380+
RedVarType = ConvertTypeForMem(XteamVD->getType());
23792381
const CodeGenModule::XteamRedVarMap &RedVarMap = CGM.getXteamRedVarMap(&S);
23802382
const CodeGenModule::XteamRedVarInfo &RVI =
23812383
(RedVarMap.find(XteamVD))->second;
23822384
RedVarAddr = &(RVI.RedVarAddr);
2383-
RedVarType = XteamVD->getType();
23842385

23852386
// SegmentValsAddr points to the SegmentVals array which will store the
23862387
// intermediate scan results computed per segment by a single thread
@@ -2520,11 +2521,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
25202521
if (!CGM.isXteamScanPhaseOne) {
25212522
// SegmentVals contains the final scanned results computed for every
25222523
// element in a segment.
2523-
Address SegmentValsGEP = Address(
2524-
Builder.CreateGEP(Int32Ty, DSegmentVals,
2525-
Builder.CreateLoad(BigJumpLoopIvAddr)),
2526-
Int32Ty,
2527-
getContext().getTypeAlignInChars(RedVarType)); // SegmentVals[*iv]
2524+
Address SegmentValsGEP =
2525+
Address(Builder.CreateGEP(RedVarType, DSegmentVals,
2526+
Builder.CreateLoad(BigJumpLoopIvAddr)),
2527+
RedVarType,
2528+
getContext().getTypeAlignInChars(
2529+
XteamVD->getType())); // SegmentVals[*iv]
25282530
// emit redvar = SegmentVals[omp.iv]
25292531
Builder.CreateStore(Builder.CreateLoad(SegmentValsGEP), *RedVarAddr);
25302532
}
@@ -2548,11 +2550,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
25482550
(CGM.isXteamRedKernel(&S) || CGM.isBigJumpLoopKernel(&S))) {
25492551
if (CGM.isXteamSegmentedScanKernel()) {
25502552
EmitBlock(Continue.getBlock());
2551-
Address SegmentValsGEP = Address(
2552-
Builder.CreateGEP(Int32Ty, DSegmentVals,
2553-
Builder.CreateLoad(BigJumpLoopIvAddr)),
2554-
Int32Ty,
2555-
getContext().getTypeAlignInChars(RedVarType)); // Segment_Vals[*iv]
2553+
Address SegmentValsGEP =
2554+
Address(Builder.CreateGEP(RedVarType, DSegmentVals,
2555+
Builder.CreateLoad(BigJumpLoopIvAddr)),
2556+
RedVarType,
2557+
getContext().getTypeAlignInChars(
2558+
XteamVD->getType())); // Segment_Vals[*iv]
25562559
Builder.CreateStore(Builder.CreateLoad(*RedVarAddr),
25572560
SegmentValsGEP); // Segment_Vals[*iv] = red_var
25582561
llvm::Value *SegmentScanLoopInc =

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -415,10 +415,10 @@ void CodeGenFunction::InitializeXteamRedCapturedVars(
415415
llvm::Value *DScanStorageInst =
416416
Builder.CreateAlloca(RedVarType, nullptr, "d_scan_storage");
417417
Address DScanStorageAddr(
418-
DScanStorageInst, Int32Ty,
418+
DScanStorageInst, RedVarType,
419419
Context.getTypeAlignInChars(Context.UnsignedIntTy));
420420
llvm::Value *NullPtrDScanStorage =
421-
llvm::ConstantPointerNull::get(Int32Ty->getPointerTo());
421+
llvm::ConstantPointerNull::get(RedVarType->getPointerTo());
422422
Builder.CreateStore(NullPtrDScanStorage, DScanStorageAddr);
423423

424424
assert(DScanStorageInst && "Device scan storage pointer cannot be null");
@@ -428,10 +428,10 @@ void CodeGenFunction::InitializeXteamRedCapturedVars(
428428
llvm::Value *DSegmentValsInst =
429429
Builder.CreateAlloca(RedVarType, nullptr, "d_segment_vals");
430430
Address DSegmentValsAddr(
431-
DSegmentValsInst, Int32Ty,
431+
DSegmentValsInst, RedVarType,
432432
Context.getTypeAlignInChars(Context.UnsignedIntTy));
433433
llvm::Value *NullPtrDSegmentVals =
434-
llvm::ConstantPointerNull::get(Int32Ty->getPointerTo());
434+
llvm::ConstantPointerNull::get(RedVarType->getPointerTo());
435435
Builder.CreateStore(NullPtrDSegmentVals, DSegmentValsAddr);
436436

437437
assert(DSegmentValsInst && "Segment Vals Array pointer cannot be null");

0 commit comments

Comments
 (0)