Skip to content

Commit 1217781

Browse files
authored
Merge branch 'main' into cuf_tma_load_llvmotr
2 parents 9319733 + 848d865 commit 1217781

File tree

99 files changed

+4850
-282
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+4850
-282
lines changed

.ci/premerge_advisor_explain.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ def main(
129129
# If the job succeeds and there is not an existing comment, we
130130
# should not write one to reduce noise.
131131
comments = []
132-
with open("comments", "w") as comment_file_handle:
132+
comments_file_name = f"comments-{platform.system()}-{platform.machine()}"
133+
with open(comments_file_name, "w") as comment_file_handle:
133134
json.dump(comments, comment_file_handle)
134135

135136

.github/workflows/premerge.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ jobs:
124124
if: ${{ always() && !startsWith(matrix.runs-on, 'depot-ubuntu-24.04-arm') }}
125125
continue-on-error: true
126126
with:
127-
name: workflow-args
127+
name: workflow-args-x86-linux
128128
path: |
129-
comments
129+
comments-Linux-x86_64
130130
131131
premerge-checks-windows:
132132
name: Build and Test Windows
@@ -185,6 +185,14 @@ jobs:
185185
path: artifacts/
186186
retention-days: 5
187187
include-hidden-files: 'true'
188+
- name: Upload Comment
189+
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
190+
if: always()
191+
continue-on-error: true
192+
with:
193+
name: workflow-args-windows
194+
path: |
195+
comments-Windows-x86_64
188196
189197
premerge-check-macos:
190198
name: MacOS Premerge Checks

clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ dependencies::initializeScanInstanceDependencyCollector(
638638
}
639639

640640
bool DependencyScanningAction::runInvocation(
641-
std::unique_ptr<CompilerInvocation> Invocation,
641+
std::string Executable, std::unique_ptr<CompilerInvocation> Invocation,
642642
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
643643
std::shared_ptr<PCHContainerOperations> PCHContainerOps,
644644
DiagnosticConsumer *DiagConsumer) {
@@ -654,9 +654,12 @@ bool DependencyScanningAction::runInvocation(
654654
if (Scanned) {
655655
// Scanning runs once for the first -cc1 invocation in a chain of driver
656656
// jobs. For any dependent jobs, reuse the scanning result and just
657-
// update the LastCC1Arguments to correspond to the new invocation.
657+
// update the new invocation.
658658
// FIXME: to support multi-arch builds, each arch requires a separate scan
659-
setLastCC1Arguments(std::move(OriginalInvocation));
659+
if (MDC)
660+
MDC->applyDiscoveredDependencies(OriginalInvocation);
661+
Consumer.handleBuildCommand(
662+
{Executable, OriginalInvocation.getCC1CommandLine()});
660663
return true;
661664
}
662665

@@ -701,8 +704,12 @@ bool DependencyScanningAction::runInvocation(
701704
// ExecuteAction is responsible for calling finish.
702705
DiagConsumerFinished = true;
703706

704-
if (Result)
705-
setLastCC1Arguments(std::move(OriginalInvocation));
707+
if (Result) {
708+
if (MDC)
709+
MDC->applyDiscoveredDependencies(OriginalInvocation);
710+
Consumer.handleBuildCommand(
711+
{Executable, OriginalInvocation.getCC1CommandLine()});
712+
}
706713

707714
return Result;
708715
}

clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,38 +38,23 @@ class DependencyScanningAction {
3838
std::optional<StringRef> ModuleName = std::nullopt)
3939
: Service(Service), WorkingDirectory(WorkingDirectory),
4040
Consumer(Consumer), Controller(Controller), DepFS(std::move(DepFS)) {}
41-
bool runInvocation(std::unique_ptr<CompilerInvocation> Invocation,
41+
bool runInvocation(std::string Executable,
42+
std::unique_ptr<CompilerInvocation> Invocation,
4243
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
4344
std::shared_ptr<PCHContainerOperations> PCHContainerOps,
4445
DiagnosticConsumer *DiagConsumer);
4546

4647
bool hasScanned() const { return Scanned; }
4748
bool hasDiagConsumerFinished() const { return DiagConsumerFinished; }
4849

49-
/// Take the cc1 arguments corresponding to the most recent invocation used
50-
/// with this action. Any modifications implied by the discovered dependencies
51-
/// will have already been applied.
52-
std::vector<std::string> takeLastCC1Arguments() {
53-
std::vector<std::string> Result;
54-
std::swap(Result, LastCC1Arguments); // Reset LastCC1Arguments to empty.
55-
return Result;
56-
}
57-
5850
private:
59-
void setLastCC1Arguments(CompilerInvocation &&CI) {
60-
if (MDC)
61-
MDC->applyDiscoveredDependencies(CI);
62-
LastCC1Arguments = CI.getCC1CommandLine();
63-
}
64-
6551
DependencyScanningService &Service;
6652
StringRef WorkingDirectory;
6753
DependencyConsumer &Consumer;
6854
DependencyActionController &Controller;
6955
IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
7056
std::optional<CompilerInstance> ScanInstanceStorage;
7157
std::shared_ptr<ModuleDepCollector> MDC;
72-
std::vector<std::string> LastCC1Arguments;
7358
bool Scanned = false;
7459
bool DiagConsumerFinished = false;
7560
};

clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,18 +76,14 @@ static bool createAndRunToolInvocation(
7676
DependencyScanningAction &Action,
7777
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
7878
std::shared_ptr<clang::PCHContainerOperations> &PCHContainerOps,
79-
DiagnosticsEngine &Diags, DependencyConsumer &Consumer) {
79+
DiagnosticsEngine &Diags) {
8080
auto Invocation = createCompilerInvocation(CommandLine, Diags);
8181
if (!Invocation)
8282
return false;
8383

84-
if (!Action.runInvocation(std::move(Invocation), std::move(FS),
85-
PCHContainerOps, Diags.getClient()))
86-
return false;
87-
88-
std::vector<std::string> Args = Action.takeLastCC1Arguments();
89-
Consumer.handleBuildCommand({CommandLine[0], std::move(Args)});
90-
return true;
84+
return Action.runInvocation(CommandLine[0], std::move(Invocation),
85+
std::move(FS), PCHContainerOps,
86+
Diags.getClient());
9187
}
9288

9389
bool DependencyScanningWorker::scanDependencies(
@@ -112,9 +108,9 @@ bool DependencyScanningWorker::scanDependencies(
112108

113109
bool Success = false;
114110
if (CommandLine[1] == "-cc1") {
115-
Success = createAndRunToolInvocation(
116-
CommandLine, Action, FS, PCHContainerOps,
117-
*DiagEngineWithCmdAndOpts.DiagEngine, Consumer);
111+
Success =
112+
createAndRunToolInvocation(CommandLine, Action, FS, PCHContainerOps,
113+
*DiagEngineWithCmdAndOpts.DiagEngine);
118114
} else {
119115
Success = forEachDriverJob(
120116
CommandLine, *DiagEngineWithCmdAndOpts.DiagEngine, FS,
@@ -128,7 +124,7 @@ bool DependencyScanningWorker::scanDependencies(
128124
return true;
129125
}
130126

131-
// Insert -cc1 comand line options into Argv
127+
// Insert -cc1 command line options into Argv
132128
std::vector<std::string> Argv;
133129
Argv.push_back(Cmd.getExecutable());
134130
llvm::append_range(Argv, Cmd.getArguments());
@@ -139,7 +135,7 @@ bool DependencyScanningWorker::scanDependencies(
139135
// dependency scanning filesystem.
140136
return createAndRunToolInvocation(
141137
std::move(Argv), Action, FS, PCHContainerOps,
142-
*DiagEngineWithCmdAndOpts.DiagEngine, Consumer);
138+
*DiagEngineWithCmdAndOpts.DiagEngine);
143139
});
144140
}
145141

flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ static const char __ldlu_r2x2[] = "__ldlu_r2x2_";
5353
static const char __ldlu_r4x4[] = "__ldlu_r4x4_";
5454
static const char __ldlu_r8x2[] = "__ldlu_r8x2_";
5555

56+
static constexpr unsigned kTMAAlignment = 16;
57+
5658
// CUDA specific intrinsic handlers.
5759
static constexpr IntrinsicHandler cudaHandlers[]{
5860
{"__ldca_i4x4",
@@ -1505,7 +1507,7 @@ static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc,
15051507
mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize);
15061508
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
15071509
barrier = builder.createConvert(loc, llvmPtrTy, barrier);
1508-
setAlignment(dst, 16);
1510+
setAlignment(dst, kTMAAlignment);
15091511
dst = builder.createConvert(loc, llvmPtrTy, dst);
15101512
src = builder.createConvert(loc, llvmPtrTy, src);
15111513
mlir::NVVM::InlinePtxOp::create(
@@ -1609,6 +1611,7 @@ static void genTMABulkStore(fir::FirOpBuilder &builder, mlir::Location loc,
16091611
mlir::Value src, mlir::Value dst, mlir::Value count,
16101612
mlir::Value eleSize) {
16111613
mlir::Value size = mlir::arith::MulIOp::create(builder, loc, eleSize, count);
1614+
setAlignment(src, kTMAAlignment);
16121615
src = convertPtrToNVVMSpace(builder, loc, src,
16131616
mlir::NVVM::NVVMMemorySpace::Shared);
16141617
dst = convertPtrToNVVMSpace(builder, loc, dst,

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ attributes(global) subroutine test_tma_bulk_store_c4(c, n)
675675
end subroutine
676676

677677
! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c4
678+
! CHECK: cuf.shared_memory !fir.array<1024xcomplex<f32>> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_c4Etmpa"} -> !fir.ref<!fir.array<1024xcomplex<f32>>>
678679
! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
679680
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;"
680681
! CHECK: nvvm.cp.async.bulk.wait_group 0
@@ -688,6 +689,7 @@ attributes(global) subroutine test_tma_bulk_store_c8(c, n)
688689
end subroutine
689690

690691
! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c8
692+
! CHECK: cuf.shared_memory !fir.array<1024xcomplex<f64>> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_c8Etmpa"} -> !fir.ref<!fir.array<1024xcomplex<f64>>>
691693
! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
692694
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;"
693695
! CHECK: nvvm.cp.async.bulk.wait_group 0
@@ -701,6 +703,7 @@ attributes(global) subroutine test_tma_bulk_store_i4(c, n)
701703
end subroutine
702704

703705
! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i4
706+
! CHECK: cuf.shared_memory !fir.array<1024xi32> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_i4Etmpa"} -> !fir.ref<!fir.array<1024xi32>>
704707
! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
705708
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;"
706709
! CHECK: nvvm.cp.async.bulk.wait_group 0
@@ -714,6 +717,7 @@ attributes(global) subroutine test_tma_bulk_store_i8(c, n)
714717
end subroutine
715718

716719
! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i8
720+
! CHECK: cuf.shared_memory !fir.array<1024xi64> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_i8Etmpa"} -> !fir.ref<!fir.array<1024xi64>>
717721
! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
718722
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;"
719723
! CHECK: nvvm.cp.async.bulk.wait_group 0
@@ -728,6 +732,7 @@ attributes(global) subroutine test_tma_bulk_store_r2(c, n)
728732
end subroutine
729733

730734
! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r2
735+
! CHECK: cuf.shared_memory !fir.array<1024xf16> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_r2Etmpa"} -> !fir.ref<!fir.array<1024xf16>>
731736
! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
732737
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;"
733738
! CHECK: nvvm.cp.async.bulk.wait_group 0
@@ -741,6 +746,7 @@ attributes(global) subroutine test_tma_bulk_store_r4(c, n)
741746
end subroutine
742747

743748
! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r4
749+
! CHECK: cuf.shared_memory !fir.array<1024xf32> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_r4Etmpa"} -> !fir.ref<!fir.array<1024xf32>>
744750
! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
745751
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;"
746752
! CHECK: nvvm.cp.async.bulk.wait_group 0
@@ -754,6 +760,7 @@ attributes(global) subroutine test_tma_bulk_store_r8(c, n)
754760
end subroutine
755761

756762
! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r8
763+
! CHECK: cuf.shared_memory !fir.array<1024xf64> align 16 {bindc_name = "tmpa", uniq_name = "_QFtest_tma_bulk_store_r8Etmpa"} -> !fir.ref<!fir.array<1024xf64>>
757764
! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
758765
! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group;"
759766
! CHECK: nvvm.cp.async.bulk.wait_group 0

flang/test/Lower/OpenMP/Todo/reduction-task.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ subroutine reduction_task()
88

99
!$omp parallel reduction(task, +:i)
1010
i = i + 1
11-
!$omp end parallel
11+
!$omp end parallel
1212
end subroutine reduction_task

flang/test/Lower/OpenMP/allocatable-array-bounds.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
subroutine read_write_section()
4848
integer, allocatable :: sp_read(:)
4949
integer, allocatable :: sp_write(:)
50-
allocate(sp_read(10))
50+
allocate(sp_read(10))
5151
allocate(sp_write(10))
5252
sp_write = (/0,0,0,0,0,0,0,0,0,0/)
5353
sp_read = (/1,2,3,4,5,6,7,8,9,10/)

flang/test/Lower/OpenMP/allocatable-map.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
!HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map.info var_ptr(%[[POINTER]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(always, to) capture(ByRef) members(%[[POINTER_MAP_MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "point"}
77
!HLFIRDIALECT: omp.target map_entries(%[[POINTER_MAP]] -> {{.*}}, %[[POINTER_MAP_MEMBER]] -> {{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.llvm_ptr<!fir.ref<i32>>) {
88
subroutine pointer_routine()
9-
integer, pointer :: point
9+
integer, pointer :: point
1010
!$omp target map(tofrom:point)
1111
point = 1
1212
!$omp end target

0 commit comments

Comments
 (0)