@@ -511,6 +511,30 @@ class OpenMPIRBuilder {
511
511
return allocaInst;
512
512
}
513
513
};
514
+
515
+ struct ScanInformation {
516
+ // / Dominates the body of the loop before scan directive
517
+ llvm::BasicBlock *OMPBeforeScanBlock = nullptr ;
518
+ // / Dominates the body of the loop before scan directive
519
+ llvm::BasicBlock *OMPAfterScanBlock = nullptr ;
520
+ // / Controls the flow to before or after scan blocks
521
+ llvm::BasicBlock *OMPScanDispatch = nullptr ;
522
+ // / Exit block of loop body
523
+ llvm::BasicBlock *OMPScanLoopExit = nullptr ;
524
+ // / Block before loop body where scan initializations are done
525
+ llvm::BasicBlock *OMPScanInit = nullptr ;
526
+ // / Block after loop body where scan finalizations are done
527
+ llvm::BasicBlock *OMPScanFinish = nullptr ;
528
+ // / If true, it indicates Input phase is lowered; else it indicates
529
+ // / ScanPhase is lowered
530
+ bool OMPFirstScanLoop = false ;
531
+ // Maps the private reduction variable to the pointer of the temporary
532
+ // buffer
533
+ llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ScanBuffPtrs;
534
+ llvm::Value *IV;
535
+ llvm::Value *Span;
536
+ } ScanInfo;
537
+
514
538
// / Initialize the internal state, this will put structures types and
515
539
// / potentially other helpers into the underlying module. Must be called
516
540
// / before any other method and only once! This internal state includes types
@@ -750,6 +774,35 @@ class OpenMPIRBuilder {
750
774
LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
751
775
const Twine &Name = " loop" );
752
776
777
+ // / Generator for the control flow structure of an OpenMP canonical loops if
778
+ // / the parent directive has an `inscan` modifier specified.
779
+ // / If the `inscan` modifier is specified, the region of the parent is
780
+ // / expected to have a `scan` directive. Based on the clauses in
781
+ // / scan directive, the body of the loop is split into two loops: Input loop
782
+ // / and Scan Loop. Input loop contains the code generated for input phase of
783
+ // / scan and Scan loop contains the code generated for scan phase of scan.
784
+ // /
785
+ // / \param Loc The insert and source location description.
786
+ // / \param BodyGenCB Callback that will generate the loop body code.
787
+ // / \param Start Value of the loop counter for the first iterations.
788
+ // / \param Stop Loop counter values past this will stop the loop.
789
+ // / \param Step Loop counter increment after each iteration; negative
790
+ // / means counting down.
791
+ // / \param IsSigned Whether Start, Stop and Step are signed integers.
792
+ // / \param InclusiveStop Whether \p Stop itself is a valid value for the loop
793
+ // / counter.
794
+ // / \param ComputeIP Insertion point for instructions computing the trip
795
+ // / count. Can be used to ensure the trip count is available
796
+ // / at the outermost loop of a loop nest. If not set,
797
+ // / defaults to the preheader of the generated loop.
798
+ // / \param Name Base name used to derive BB and instruction names.
799
+ // /
800
+ // / \returns A vector containing Loop Info of Input Loop and Scan Loop.
801
+ Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops (
802
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
803
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
804
+ InsertPointTy ComputeIP, const Twine &Name);
805
+
753
806
// / Calculate the trip count of a canonical loop.
754
807
// /
755
808
// / This allows specifying user-defined loop counter values using increment,
@@ -818,13 +871,16 @@ class OpenMPIRBuilder {
818
871
// / at the outermost loop of a loop nest. If not set,
819
872
// / defaults to the preheader of the generated loop.
820
873
// / \param Name Base name used to derive BB and instruction names.
874
+ // / \param InScan Whether loop has a scan reduction specified.
821
875
// /
822
876
// / \returns An object representing the created control flow structure which
823
877
// / can be used for loop-associated directives.
824
- LLVM_ABI Expected<CanonicalLoopInfo *> createCanonicalLoop (
825
- const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
826
- Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
827
- InsertPointTy ComputeIP = {}, const Twine &Name = " loop" );
878
+ LLVM_ABI Expected<CanonicalLoopInfo *>
879
+ createCanonicalLoop (const LocationDescription &Loc,
880
+ LoopBodyGenCallbackTy BodyGenCB, Value *Start,
881
+ Value *Stop, Value *Step, bool IsSigned,
882
+ bool InclusiveStop, InsertPointTy ComputeIP = {},
883
+ const Twine &Name = " loop" , bool InScan = false );
828
884
829
885
// / Collapse a loop nest into a single loop.
830
886
// /
@@ -1556,6 +1612,45 @@ class OpenMPIRBuilder {
1556
1612
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
1557
1613
Function *ReduceFn, AttributeList FuncAttrs);
1558
1614
1615
+ // / Creates the runtime call specified
1616
+ // / \param Callee Function Declaration Value
1617
+ // / \param Args Arguments passed to the call
1618
+ // / \param Name Optional param to specify the name of the call Instruction.
1619
+ // /
1620
+ // / \return The Runtime call instruction created.
1621
+ llvm::CallInst *emitNoUnwindRuntimeCall (llvm::FunctionCallee Callee,
1622
+ ArrayRef<llvm::Value *> Args,
1623
+ const llvm::Twine &Name);
1624
+
1625
+ // / Helper function for CreateCanonicalScanLoops to create InputLoop
1626
+ // / in the firstGen and Scan Loop in the SecondGen
1627
+ // / \param InputLoopGen Callback for generating the loop for input phase
1628
+ // / \param ScanLoopGen Callback for generating the loop for scan phase
1629
+ // /
1630
+ // / \return error if any produced, else return success.
1631
+ Error emitScanBasedDirectiveIR (
1632
+ llvm::function_ref<Error()> InputLoopGen,
1633
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen);
1634
+
1635
+ // / Creates the basic blocks required for scan reduction.
1636
+ void createScanBBs ();
1637
+
1638
+ // / Dynamically allocates the buffer needed for scan reduction.
1639
+ // / \param AllocaIP The IP where possibly-shared pointer of buffer needs to be
1640
+ // / declared. \param ScanVars Scan Variables.
1641
+ // /
1642
+ // / \return error if any produced, else return success.
1643
+ Error emitScanBasedDirectiveDeclsIR (InsertPointTy AllocaIP,
1644
+ ArrayRef<llvm::Value *> ScanVars,
1645
+ ArrayRef<llvm::Type *> ScanVarsType);
1646
+
1647
+ // / Copies the result back to the reduction variable.
1648
+ // / \param ReductionInfos Array type containing the ReductionOps.
1649
+ // /
1650
+ // / \return error if any produced, else return success.
1651
+ Error emitScanBasedDirectiveFinalsIR (
1652
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
1653
+
1559
1654
// / This function emits a helper that gathers Reduce lists from the first
1560
1655
// / lane of every active warp to lanes in the first warp.
1561
1656
// /
@@ -2639,6 +2734,41 @@ class OpenMPIRBuilder {
2639
2734
FinalizeCallbackTy FiniCB,
2640
2735
Value *Filter);
2641
2736
2737
+ // / This function performs the scan reduction of the values updated in
2738
+ // / the input phase. The reduction logic needs to be emitted between input
2739
+ // / and scan loop returned by `CreateCanonicalScanLoops`. The following
2740
+ // / is the code that is generated, `buffer` and `span` are expected to be
2741
+ // / populated before executing the generated code.
2742
+ // /
2743
+ // / for (int k = 0; k != ceil(log2(span)); ++k) {
2744
+ // / i=pow(2,k)
2745
+ // / for (size cnt = last_iter; cnt >= i; --cnt)
2746
+ // / buffer[cnt] op= buffer[cnt-i];
2747
+ // / }
2748
+ // / \param Loc The insert and source location description.
2749
+ // / \param ReductionInfos Array type containing the ReductionOps.
2750
+ // /
2751
+ // / \returns The insertion position *after* the masked.
2752
+ InsertPointOrErrorTy emitScanReduction (
2753
+ const LocationDescription &Loc,
2754
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
2755
+
2756
+ // / This directive split and directs the control flow to input phase
2757
+ // / blocks or scan phase blocks based on 1. whether input loop or scan loop
2758
+ // / is executed, 2. whether exclusive or inclusive scan is used.
2759
+ // /
2760
+ // / \param Loc The insert and source location description.
2761
+ // / \param AllocaIP The IP where the temporary buffer for scan reduction
2762
+ // needs to be allocated.
2763
+ // / \param ScanVars Scan Variables.
2764
+ // / \param IsInclusive Whether it is an inclusive or exclusive scan.
2765
+ // /
2766
+ // / \returns The insertion position *after* the scan.
2767
+ InsertPointOrErrorTy createScan (const LocationDescription &Loc,
2768
+ InsertPointTy AllocaIP,
2769
+ ArrayRef<llvm::Value *> ScanVars,
2770
+ ArrayRef<llvm::Type *> ScanVarsType,
2771
+ bool IsInclusive);
2642
2772
// / Generator for '#omp critical'
2643
2773
// /
2644
2774
// / \param Loc The insert and source location description.
0 commit comments