Skip to content

Commit 0d4b852

Browse files
authored
merge main into amd-staging (llvm#2601)
2 parents 73ec592 + ba888a0 commit 0d4b852

File tree

78 files changed

+3096
-1410
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+3096
-1410
lines changed

clang/lib/Sema/SemaLookup.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,6 +1978,8 @@ bool LookupResult::isReachableSlow(Sema &SemaRef, NamedDecl *D) {
19781978
if (D->isModulePrivate())
19791979
return false;
19801980

1981+
Module *DeclTopModule = DeclModule->getTopLevelModule();
1982+
19811983
// [module.reach]/p1
19821984
// A translation unit U is necessarily reachable from a point P if U is a
19831985
// module interface unit on which the translation unit containing P has an
@@ -1996,17 +1998,28 @@ bool LookupResult::isReachableSlow(Sema &SemaRef, NamedDecl *D) {
19961998
//
19971999
// Here we only check for the first condition. Since we couldn't see
19982000
// DeclModule if it isn't (transitively) imported.
1999-
if (DeclModule->getTopLevelModule()->isModuleInterfaceUnit())
2001+
if (DeclTopModule->isModuleInterfaceUnit())
20002002
return true;
20012003

2002-
// [module.reach]/p2
2004+
// [module.reach]/p1,2
2005+
// A translation unit U is necessarily reachable from a point P if U is a
2006+
// module interface unit on which the translation unit containing P has an
2007+
// interface dependency, or the translation unit containing P imports U, in
2008+
// either case prior to P
2009+
//
20032010
// Additional translation units on
20042011
// which the point within the program has an interface dependency may be
20052012
// considered reachable, but it is unspecified which are and under what
20062013
// circumstances.
2007-
//
2008-
// The decision here is to treat all additional tranditional units as
2009-
// unreachable.
2014+
Module *CurrentM = SemaRef.getCurrentModule();
2015+
2016+
// Directly imported module are necessarily reachable.
2017+
// Since we can't export import a module implementation partition unit, we
2018+
// don't need to count for Exports here.
2019+
if (CurrentM && CurrentM->getTopLevelModule()->Imports.count(DeclTopModule))
2020+
return true;
2021+
2022+
// Then we treat all module implementation partition unit as unreachable.
20102023
return false;
20112024
}
20122025

clang/lib/Sema/SemaModule.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -712,19 +712,20 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
712712
Mod->Kind == Module::ModuleKind::ModulePartitionImplementation) {
713713
Diag(ExportLoc, diag::err_export_partition_impl)
714714
<< SourceRange(ExportLoc, Path.back().getLoc());
715-
} else if (!ModuleScopes.empty() && !currentModuleIsImplementation()) {
715+
} else if (ExportLoc.isValid() &&
716+
(ModuleScopes.empty() || currentModuleIsImplementation())) {
717+
// [module.interface]p1:
718+
// An export-declaration shall inhabit a namespace scope and appear in the
719+
// purview of a module interface unit.
720+
Diag(ExportLoc, diag::err_export_not_in_module_interface);
721+
} else if (!ModuleScopes.empty()) {
716722
// Re-export the module if the imported module is exported.
717723
// Note that we don't need to add re-exported module to Imports field
718724
// since `Exports` implies the module is imported already.
719725
if (ExportLoc.isValid() || getEnclosingExportDecl(Import))
720726
getCurrentModule()->Exports.emplace_back(Mod, false);
721727
else
722728
getCurrentModule()->Imports.insert(Mod);
723-
} else if (ExportLoc.isValid()) {
724-
// [module.interface]p1:
725-
// An export-declaration shall inhabit a namespace scope and appear in the
726-
// purview of a module interface unit.
727-
Diag(ExportLoc, diag::err_export_not_in_module_interface);
728729
}
729730

730731
return Import;

clang/test/Modules/pr143788.cppm

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: rm -rf %t
2+
// RUN: mkdir -p %t
3+
// RUN: split-file %s %t
4+
//
5+
// RUN: %clang_cc1 -std=c++20 %t/M.cppm -emit-module-interface -o %t/M.pcm
6+
// RUN: %clang_cc1 -std=c++20 %t/P.cppm -emit-module-interface -o %t/P.pcm
7+
// RUN: %clang_cc1 -std=c++20 %t/I.cpp -fmodule-file=M:P=%t/P.pcm -fmodule-file=M=%t/M.pcm -fsyntax-only -verify
8+
9+
//--- H.hpp
10+
struct S{};
11+
12+
//--- M.cppm
13+
export module M;
14+
15+
16+
//--- P.cppm
17+
module;
18+
#include "H.hpp"
19+
module M:P;
20+
21+
using T = S;
22+
23+
//--- I.cpp
24+
// expected-no-diagnostics
25+
module M;
26+
import :P;
27+
28+
T f() { return {}; }

compiler-rt/test/lit.common.configured.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ set_default("gold_executable", "@GOLD_EXECUTABLE@")
2525
set_default("clang", "@COMPILER_RT_RESOLVED_TEST_COMPILER@")
2626
set_default("compiler_id", "@COMPILER_RT_TEST_COMPILER_ID@")
2727
set_default("python_executable", "@Python3_EXECUTABLE@")
28-
set_default("python_root_dir", "@Python3_ROOT_DIR@")
2928
set_default("compiler_rt_debug", @COMPILER_RT_DEBUG_PYBOOL@)
3029
set_default("compiler_rt_intercept_libdispatch", @COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL@)
3130
set_default("compiler_rt_output_dir", "@COMPILER_RT_RESOLVED_OUTPUT_DIR@")

llvm/docs/NVPTXUsage.rst

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,7 +1016,7 @@ Syntax:
10161016

10171017
.. code-block:: llvm
10181018
1019-
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr %tensor_map, i32 %d0, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch)
1019+
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr %tensor_map, i32 %d0, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch, i32 %flag_cta_group)
10201020
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.2d(..., i32 %d0, i32 %d1, ...)
10211021
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.3d(..., i32 %d0, i32 %d1, i32 %d2, ...)
10221022
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, ...)
@@ -1034,18 +1034,26 @@ source tensor is preserved at the destination. The dimension of the
10341034
tensor data ranges from 1d to 5d with the coordinates specified
10351035
by the ``i32 %d0 ... i32 %d4`` arguments.
10361036

1037-
* The last two arguments to these intrinsics are boolean flags
1038-
indicating support for cache_hint and/or multicast modifiers.
1039-
These flag arguments must be compile-time constants. The backend
1040-
looks through these flags and lowers the intrinsics appropriately.
1037+
* The last three arguments to these intrinsics are flags
1038+
indicating support for multicast, cache_hint and cta_group::1/2
1039+
modifiers. These flag arguments must be compile-time constants.
1040+
The backend looks through these flags and lowers the intrinsics
1041+
appropriately.
10411042

1042-
* The Nth argument (denoted by ``i1 flag_ch``) when set, indicates
1043+
* The argument denoted by ``i1 %flag_ch`` when set, indicates
10431044
a valid cache_hint (``i64 %ch``) and generates the ``.L2::cache_hint``
10441045
variant of the PTX instruction.
10451046

1046-
* The [N-1]th argument (denoted by ``i1 flag_mc``) when set, indicates
1047-
the presence of a multicast mask (``i16 %mc``) and generates the PTX
1048-
instruction with the ``.multicast::cluster`` modifier.
1047+
* The argument denoted by ``i1 %flag_mc`` when set, indicates
1048+
the presence of a multicast mask (``i16 %mc``) and generates
1049+
the PTX instruction with the ``.multicast::cluster`` modifier.
1050+
1051+
* The argument denoted by ``i32 %flag_cta_group`` takes values within
1052+
the range [0, 3) i.e. {0,1,2}. When the value of ``%flag_cta_group``
1053+
is not within the range, it may raise an error from the Verifier.
1054+
The default value is '0' with no cta_group modifier in the
1055+
instruction. The values of '1' and '2' lower to ``cta_group::1``
1056+
and ``cta_group::2`` variants of the PTX instruction respectively.
10491057

10501058
For more information, refer PTX ISA
10511059
`<https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor>`_.
@@ -1058,7 +1066,7 @@ Syntax:
10581066

10591067
.. code-block:: llvm
10601068
1061-
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.3d(ptr addrspace(3) %dst, ptr addrspace(3) %bar, ptr %tensor_map, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch)
1069+
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.3d(ptr addrspace(3) %dst, ptr addrspace(3) %bar, ptr %tensor_map, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i16 %mc, i64 %ch, i1 %flag_mc, i1 %flag_ch, i32 %flag_cta_group)
10621070
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, ...)
10631071
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.im2col.5d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, ...)
10641072
@@ -1074,8 +1082,8 @@ are unrolled into a single dimensional column at the destination. In this
10741082
mode, the tensor has to be at least three-dimensional. Along with the tensor
10751083
coordinates, im2col offsets are also specified (denoted by
10761084
``i16 im2col0...i16 %im2col2``). The number of im2col offsets is two less
1077-
than the number of dimensions of the tensor operation. The last two arguments
1078-
to these intrinsics are boolean flags, with the same functionality as described
1085+
than the number of dimensions of the tensor operation. The last three arguments
1086+
to these intrinsics are flags, with the same functionality as described
10791087
in the ``tile`` mode intrinsics above.
10801088

10811089
For more information, refer PTX ISA

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(
176176
LLVM_ABI Intrinsic::ID
177177
getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI);
178178

179+
/// Returns the corresponding llvm.vector.interleaveN intrinsic for factor N.
180+
LLVM_ABI Intrinsic::ID getInterleaveIntrinsicID(unsigned Factor);
181+
182+
/// Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
183+
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor);
184+
179185
/// Given a vector and an element number, see if the scalar value is
180186
/// already around as a register, for example if it were inserted then extracted
181187
/// from the vector.

llvm/include/llvm/IR/BasicBlock.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
6363
public:
6464
using InstListType = SymbolTableList<Instruction, ilist_iterator_bits<true>,
6565
ilist_parent<BasicBlock>>;
66-
/// Flag recording whether or not this block stores debug-info in the form
67-
/// of intrinsic instructions (false) or non-instruction records (true).
68-
bool IsNewDbgInfoFormat;
6966

7067
private:
7168
// Allow Function to renumber blocks.
@@ -95,12 +92,6 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
9592
/// IsNewDbgInfoFormat = false.
9693
LLVM_ABI void convertFromNewDbgValues();
9794

98-
/// Ensure the block is in "old" dbg.value format (\p NewFlag == false) or
99-
/// in the new format (\p NewFlag == true), converting to the desired format
100-
/// if necessary.
101-
LLVM_ABI void setIsNewDbgInfoFormat(bool NewFlag);
102-
LLVM_ABI void setNewDbgInfoFormatFlag(bool NewFlag);
103-
10495
unsigned getNumber() const {
10596
assert(getParent() && "only basic blocks in functions have valid numbers");
10697
return Number;

llvm/include/llvm/IR/Function.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,6 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
111111
friend class SymbolTableListTraits<Function>;
112112

113113
public:
114-
/// Is this function using intrinsics to record the position of debugging
115-
/// information, or non-intrinsic records? See IsNewDbgInfoFormat in
116-
/// \ref BasicBlock.
117-
bool IsNewDbgInfoFormat;
118-
119114
/// hasLazyArguments/CheckLazyArguments - The argument list of a function is
120115
/// built on demand, so that the list isn't allocated until the first client
121116
/// needs it. The hasLazyArguments predicate returns true if the arg list
@@ -130,9 +125,6 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
130125
/// \see BasicBlock::convertFromNewDbgValues.
131126
void convertFromNewDbgValues();
132127

133-
void setIsNewDbgInfoFormat(bool NewVal);
134-
void setNewDbgInfoFormatFlag(bool NewVal);
135-
136128
private:
137129
friend class TargetLibraryInfoImpl;
138130

@@ -760,7 +752,6 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
760752
/// to the newly inserted BB.
761753
Function::iterator insert(Function::iterator Position, BasicBlock *BB) {
762754
Function::iterator FIt = BasicBlocks.insert(Position, BB);
763-
BB->setIsNewDbgInfoFormat(IsNewDbgInfoFormat);
764755
return FIt;
765756
}
766757

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2020,20 +2020,26 @@ foreach dim = 1...5 in {
20202020
defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0);
20212021
defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets);
20222022

2023+
defvar g2s_params = !listconcat(
2024+
[llvm_shared_cluster_ptr_ty, // dst_ptr
2025+
llvm_shared_ptr_ty, // mbarrier_ptr
2026+
llvm_ptr_ty], // tensormap_ptr
2027+
tensor_dim_args, // actual tensor dims
2028+
im2col_offsets_args, // im2col offsets
2029+
[llvm_i16_ty, // cta_mask
2030+
llvm_i64_ty]); // cache_hint
2031+
defvar g2s_flags = [llvm_i1_ty, // Flag for cta_mask
2032+
llvm_i1_ty, // Flag for cache_hint
2033+
llvm_i32_ty]; // Flag for cta_group
2034+
defvar cta_group_idx = !add(
2035+
!size(g2s_params),
2036+
!sub(!size(g2s_flags), 1));
2037+
defvar g2s_props = [IntrConvergent,
2038+
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
2039+
// Allowed values for cta_group are {0,1,2} i.e [0, 3).
2040+
Range<ArgIndex<cta_group_idx>, 0, 3>];
20232041
def int_nvvm_cp_async_bulk_tensor_g2s_ # mode # _ # dim # d :
2024-
DefaultAttrsIntrinsicFlags<[],
2025-
!listconcat([llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr
2026-
llvm_shared_ptr_ty, // mbarrier_smem_ptr
2027-
llvm_ptr_ty], // tensormap_ptr
2028-
tensor_dim_args, // actual tensor dims
2029-
im2col_offsets_args, // im2col offsets
2030-
[llvm_i16_ty, // cta_mask
2031-
llvm_i64_ty]), // cache_hint
2032-
[llvm_i1_ty, // Flag for cta_mask
2033-
llvm_i1_ty], // Flag for cache_hint
2034-
[IntrConvergent,
2035-
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
2036-
NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoCapture<ArgIndex<2>>]>;
2042+
DefaultAttrsIntrinsicFlags<[], g2s_params, g2s_flags, g2s_props>;
20372043

20382044
def int_nvvm_cp_async_bulk_tensor_s2g_ # mode # _ # dim # d :
20392045
DefaultAttrsIntrinsicFlags<[],

llvm/include/llvm/IR/Module.h

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,6 @@ class LLVM_ABI Module {
215215
/// @name Constructors
216216
/// @{
217217
public:
218-
/// Is this Module using intrinsics to record the position of debugging
219-
/// information, or non-intrinsic records? See IsNewDbgInfoFormat in
220-
/// \ref BasicBlock.
221-
bool IsNewDbgInfoFormat;
222-
223218
/// Used when printing this module in the new debug info format; removes all
224219
/// declarations of debug intrinsics that are replaced by non-intrinsic
225220
/// records in the new format.
@@ -230,28 +225,13 @@ class LLVM_ABI Module {
230225
for (auto &F : *this) {
231226
F.convertToNewDbgValues();
232227
}
233-
IsNewDbgInfoFormat = true;
234228
}
235229

236230
/// \see BasicBlock::convertFromNewDbgValues.
237231
void convertFromNewDbgValues() {
238232
for (auto &F : *this) {
239233
F.convertFromNewDbgValues();
240234
}
241-
IsNewDbgInfoFormat = false;
242-
}
243-
244-
void setIsNewDbgInfoFormat(bool UseNewFormat) {
245-
if (UseNewFormat && !IsNewDbgInfoFormat)
246-
convertToNewDbgValues();
247-
else if (!UseNewFormat && IsNewDbgInfoFormat)
248-
convertFromNewDbgValues();
249-
}
250-
void setNewDbgInfoFormatFlag(bool NewFlag) {
251-
for (auto &F : *this) {
252-
F.setNewDbgInfoFormatFlag(NewFlag);
253-
}
254-
IsNewDbgInfoFormat = NewFlag;
255235
}
256236

257237
/// The Module constructor. Note that there is no default constructor. You

0 commit comments

Comments
 (0)