@@ -549,6 +549,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
549
549
assert (Itr != RedVarMap.end () && " Metadata not found" );
550
550
551
551
const CodeGenModule::XteamRedVarInfo &RVI = Itr->second ;
552
+ llvm::Type *RedVarType = ConvertTypeForMem (XteamVD->getType ());
552
553
553
554
assert (RVI.ArgPos + 1 < Args->size () && " Arg position beyond bounds" );
554
555
@@ -568,9 +569,9 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
568
569
// {
569
570
// RedVar += TeamVals[TeamID - 1]
570
571
// }
571
-
572
572
Address ScanStorageValGEP = Address (
573
- Builder.CreateGEP (Int32Ty, DScanStorage, GlobalGpuThreadId), Int32Ty,
573
+ Builder.CreateGEP (RedVarType, DScanStorage, GlobalGpuThreadId),
574
+ RedVarType,
574
575
getContext ().getTypeAlignInChars (
575
576
XteamVD->getType ())); // Storage[GlobalTID]
576
577
Builder.CreateStore (Builder.CreateLoad (ScanStorageValGEP),
@@ -586,10 +587,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
586
587
EmitBlock (IsAfterFirstTeamThenBlock);
587
588
Address PrevTeamValGEP =
588
589
Address (Builder.CreateGEP (
589
- Int32Ty , DTeamVals,
590
+ RedVarType , DTeamVals,
590
591
Builder.CreateSub (WorkGroupId,
591
592
llvm::ConstantInt::get (Int32Ty, 1 ))),
592
- Int32Ty ,
593
+ RedVarType ,
593
594
getContext ().getTypeAlignInChars (
594
595
XteamVD->getType ())); // TeamVals[TeamID - 1]
595
596
Builder.CreateStore (Builder.CreateAdd (Builder.CreateLoad (RVI.RedVarAddr ),
@@ -614,7 +615,7 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
614
615
// }
615
616
// }
616
617
617
- Builder.CreateStore (llvm::ConstantInt::get (Int32Ty , 0 ),
618
+ Builder.CreateStore (llvm::ConstantInt::get (RedVarType , 0 ),
618
619
RVI.RedVarAddr ); // RedVar = 0
619
620
llvm::Value *IsNotFirstThread = Builder.CreateICmpUGE (
620
621
GlobalGpuThreadId,
@@ -630,8 +631,8 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
630
631
GlobalGpuThreadId,
631
632
llvm::ConstantInt::get (Int32Ty, 1 )); // GlobalTID - 1
632
633
Address ScanStoragePrevValGEP = Address (
633
- Builder.CreateGEP (Int32Ty , DScanStorage, PrevGlobalGpuThreadId),
634
- Int32Ty ,
634
+ Builder.CreateGEP (RedVarType , DScanStorage, PrevGlobalGpuThreadId),
635
+ RedVarType ,
635
636
getContext ().getTypeAlignInChars (
636
637
XteamVD->getType ())); // Storage[GlobalTID - 1]
637
638
Builder.CreateStore (Builder.CreateLoad (ScanStoragePrevValGEP),
@@ -656,10 +657,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
656
657
EmitBlock (IsNotFirstThreadInTeamThenBlock);
657
658
Address PrevTeamValGEP =
658
659
Address (Builder.CreateGEP (
659
- Int32Ty , DTeamVals,
660
+ RedVarType , DTeamVals,
660
661
Builder.CreateSub (WorkGroupId,
661
662
llvm::ConstantInt::get (Int32Ty, 1 ))),
662
- Int32Ty ,
663
+ RedVarType ,
663
664
getContext ().getTypeAlignInChars (
664
665
XteamVD->getType ())); // TeamVals[TeamID - 1]
665
666
Builder.CreateStore (Builder.CreateAdd (Builder.CreateLoad (RVI.RedVarAddr ),
@@ -676,10 +677,10 @@ void CodeGenFunction::EmitNoLoopXteamScanPhaseTwoCode(
676
677
EmitBlock (IsAfterSecondTeamThenBlock);
677
678
Address PrevPrevTeamValGEP =
678
679
Address (Builder.CreateGEP (
679
- Int32Ty , DTeamVals,
680
+ RedVarType , DTeamVals,
680
681
Builder.CreateSub (WorkGroupId,
681
682
llvm::ConstantInt::get (Int32Ty, 2 ))),
682
- Int32Ty ,
683
+ RedVarType ,
683
684
getContext ().getTypeAlignInChars (
684
685
XteamVD->getType ())); // TeamVals[TeamID - 2]
685
686
Builder.CreateStore (
@@ -2307,12 +2308,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
2307
2308
2308
2309
llvm::Value *SegmentLoopUB = nullptr ;
2309
2310
llvm::Value *DSegmentVals = nullptr ;
2310
- llvm::Value *ThreadLevelRes = nullptr ;
2311
2311
llvm::Value *GlobalUpperBound = nullptr ;
2312
2312
const Address *RedVarAddr = nullptr ;
2313
2313
llvm::BasicBlock *ExecBB = nullptr ;
2314
2314
llvm::BasicBlock *DoneBB = nullptr ;
2315
- clang::QualType RedVarType;
2315
+ const clang::VarDecl *XteamVD;
2316
+ llvm::Type *RedVarType;
2316
2317
if (getLangOpts ().OpenMPIsTargetDevice && CGM.isXteamSegmentedScanKernel ()) {
2317
2318
// Compute Loop trip-count (N) = GlobalUB - GlobalLB + 1
2318
2319
const auto UBLValue = EmitLValue (
@@ -2368,19 +2369,19 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
2368
2369
Builder.CreateMul (SegmentSizeForScan, GlobalGpuThreadId),
2369
2370
BigJumpLoopIvAddr); // *iv = GlobalTID * Seg_Size
2370
2371
2371
- // Every thread loops till just before the SegmentLoopUB = (GlobaTID + 1) *
2372
- // Seg_Size
2372
+ // Every thread loops till just before the SegmentLoopUB:
2373
+ // SegmentLoopUB = (GlobaTID + 1) * Seg_Size
2373
2374
SegmentLoopUB = Builder.CreateMul (
2374
2375
SegmentSizeForScan,
2375
2376
Builder.CreateAdd (GlobalGpuThreadId,
2376
2377
llvm::ConstantInt::get (Int32Ty, 1 )));
2377
2378
2378
- auto XteamVD = *(CGM.getXteamOrderedRedVar (&S).begin ());
2379
+ XteamVD = *(CGM.getXteamOrderedRedVar (&S).begin ());
2380
+ RedVarType = ConvertTypeForMem (XteamVD->getType ());
2379
2381
const CodeGenModule::XteamRedVarMap &RedVarMap = CGM.getXteamRedVarMap (&S);
2380
2382
const CodeGenModule::XteamRedVarInfo &RVI =
2381
2383
(RedVarMap.find (XteamVD))->second ;
2382
2384
RedVarAddr = &(RVI.RedVarAddr );
2383
- RedVarType = XteamVD->getType ();
2384
2385
2385
2386
// SegmentValsAddr points to the SegmentVals array which will store the
2386
2387
// intermediate scan results computed per segment by a single thread
@@ -2520,11 +2521,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
2520
2521
if (!CGM.isXteamScanPhaseOne ) {
2521
2522
// SegmentVals contains the final scanned results computed for every
2522
2523
// element in a segment.
2523
- Address SegmentValsGEP = Address (
2524
- Builder.CreateGEP (Int32Ty, DSegmentVals,
2525
- Builder.CreateLoad (BigJumpLoopIvAddr)),
2526
- Int32Ty,
2527
- getContext ().getTypeAlignInChars (RedVarType)); // SegmentVals[*iv]
2524
+ Address SegmentValsGEP =
2525
+ Address (Builder.CreateGEP (RedVarType, DSegmentVals,
2526
+ Builder.CreateLoad (BigJumpLoopIvAddr)),
2527
+ RedVarType,
2528
+ getContext ().getTypeAlignInChars (
2529
+ XteamVD->getType ())); // SegmentVals[*iv]
2528
2530
// emit redvar = SegmentVals[omp.iv]
2529
2531
Builder.CreateStore (Builder.CreateLoad (SegmentValsGEP), *RedVarAddr);
2530
2532
}
@@ -2548,11 +2550,12 @@ void CodeGenFunction::EmitForStmtWithArgs(const ForStmt &S,
2548
2550
(CGM.isXteamRedKernel (&S) || CGM.isBigJumpLoopKernel (&S))) {
2549
2551
if (CGM.isXteamSegmentedScanKernel ()) {
2550
2552
EmitBlock (Continue.getBlock ());
2551
- Address SegmentValsGEP = Address (
2552
- Builder.CreateGEP (Int32Ty, DSegmentVals,
2553
- Builder.CreateLoad (BigJumpLoopIvAddr)),
2554
- Int32Ty,
2555
- getContext ().getTypeAlignInChars (RedVarType)); // Segment_Vals[*iv]
2553
+ Address SegmentValsGEP =
2554
+ Address (Builder.CreateGEP (RedVarType, DSegmentVals,
2555
+ Builder.CreateLoad (BigJumpLoopIvAddr)),
2556
+ RedVarType,
2557
+ getContext ().getTypeAlignInChars (
2558
+ XteamVD->getType ())); // Segment_Vals[*iv]
2556
2559
Builder.CreateStore (Builder.CreateLoad (*RedVarAddr),
2557
2560
SegmentValsGEP); // Segment_Vals[*iv] = red_var
2558
2561
llvm::Value *SegmentScanLoopInc =
0 commit comments