Skip to content

Commit 9eaff37

Browse files
authored
Merge branch 'main' into cfi-valoffset
2 parents 647234b + b242ae3 commit 9eaff37

File tree

168 files changed

+2666
-340
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+2666
-340
lines changed

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ class ASTWriter : public ASTDeserializationListener,
564564
void WriteHeaderSearch(const HeaderSearch &HS);
565565
void WritePreprocessorDetail(PreprocessingRecord &PPRec,
566566
uint64_t MacroOffsetsBase);
567-
void WriteSubmodules(Module *WritingModule, ASTContext &Context);
567+
void WriteSubmodules(Module *WritingModule, ASTContext *Context);
568568

569569
void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
570570
bool isModule);
@@ -585,7 +585,7 @@ class ASTWriter : public ASTDeserializationListener,
585585
void WriteComments(ASTContext &Context);
586586
void WriteSelectors(Sema &SemaRef);
587587
void WriteReferencedSelectorsPool(Sema &SemaRef);
588-
void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver,
588+
void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver *IdResolver,
589589
bool IsModule);
590590
void WriteDeclAndTypes(ASTContext &Context);
591591
void PrepareWritingSpecialDecls(Sema &SemaRef);
@@ -642,7 +642,7 @@ class ASTWriter : public ASTDeserializationListener,
642642
void WriteDeclAbbrevs();
643643
void WriteDecl(ASTContext &Context, Decl *D);
644644

645-
ASTFileSignature WriteASTCore(Sema &SemaRef, StringRef isysroot,
645+
ASTFileSignature WriteASTCore(Sema *SemaPtr, StringRef isysroot,
646646
Module *WritingModule);
647647

648648
public:
@@ -662,10 +662,13 @@ class ASTWriter : public ASTDeserializationListener,
662662
/// include timestamps in the output file.
663663
time_t getTimestampForOutput(const FileEntry *E) const;
664664

665-
/// Write a precompiled header for the given semantic analysis.
665+
/// Write a precompiled header or a module with the AST produced by the
666+
/// \c Sema object, or a dependency scanner module with the preprocessor state
667+
/// produced by the \c Preprocessor object.
666668
///
667-
/// \param SemaRef a reference to the semantic analysis object that processed
668-
/// the AST to be written into the precompiled header.
669+
/// \param Subject The \c Sema object that processed the AST to be written, or
670+
/// in the case of a dependency scanner module the \c Preprocessor that holds
671+
/// the state.
669672
///
670673
/// \param WritingModule The module that we are writing. If null, we are
671674
/// writing a precompiled header.
@@ -676,8 +679,9 @@ class ASTWriter : public ASTDeserializationListener,
676679
///
677680
/// \return the module signature, which eventually will be a hash of
678681
/// the module but currently is merely a random 32-bit number.
679-
ASTFileSignature WriteAST(Sema &SemaRef, StringRef OutputFile,
680-
Module *WritingModule, StringRef isysroot,
682+
ASTFileSignature WriteAST(llvm::PointerUnion<Sema *, Preprocessor *> Subject,
683+
StringRef OutputFile, Module *WritingModule,
684+
StringRef isysroot,
681685
bool ShouldCacheASTInMemory = false);
682686

683687
/// Emit a token.

clang/include/clang/Serialization/ModuleFile.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,20 @@ enum ModuleKind {
6262

6363
/// The input file info that has been loaded from an AST file.
6464
struct InputFileInfo {
65-
std::string FilenameAsRequested;
66-
std::string Filename;
65+
StringRef UnresolvedImportedFilenameAsRequested;
66+
StringRef UnresolvedImportedFilename;
67+
6768
uint64_t ContentHash;
6869
off_t StoredSize;
6970
time_t StoredTime;
7071
bool Overridden;
7172
bool Transient;
7273
bool TopLevel;
7374
bool ModuleMap;
75+
76+
bool isValid() const {
77+
return !UnresolvedImportedFilenameAsRequested.empty();
78+
}
7479
};
7580

7681
/// The input file that has been loaded from this AST file, along with

clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ struct ModuleDeps {
120120
/// additionally appear in \c FileDeps as a dependency.
121121
std::string ClangModuleMapFile;
122122

123-
/// A collection of absolute paths to files that this module directly depends
124-
/// on, not including transitive dependencies.
125-
llvm::StringSet<> FileDeps;
126-
127123
/// A collection of absolute paths to module map files that this module needs
128124
/// to know about. The ordering is significant.
129125
std::vector<std::string> ModuleMapFileDeps;
@@ -143,13 +139,25 @@ struct ModuleDeps {
143139
/// an entity from this module is used.
144140
llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries;
145141

142+
/// Invokes \c Cb for all file dependencies of this module. Each provided
143+
/// \c StringRef is only valid within the individual callback invocation.
144+
void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
145+
146146
/// Get (or compute) the compiler invocation that can be used to build this
147147
/// module. Does not include argv[0].
148148
const std::vector<std::string> &getBuildArguments();
149149

150150
private:
151+
friend class ModuleDepCollector;
151152
friend class ModuleDepCollectorPP;
152153

154+
/// The base directory for relative paths in \c FileDeps.
155+
std::string FileDepsBaseDir;
156+
157+
/// A collection of paths to files that this module directly depends on, not
158+
/// including transitive dependencies.
159+
std::vector<std::string> FileDeps;
160+
153161
std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
154162
BuildInfo;
155163
};

clang/lib/Frontend/ASTUnit.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2359,7 +2359,7 @@ bool ASTUnit::Save(StringRef File) {
23592359

23602360
static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer,
23612361
Sema &S, raw_ostream &OS) {
2362-
Writer.WriteAST(S, std::string(), nullptr, "");
2362+
Writer.WriteAST(&S, std::string(), nullptr, "");
23632363

23642364
// Write the generated bitstream to "Out".
23652365
if (!Buffer.empty())

clang/lib/Headers/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,12 @@ set(x86_files
276276
cpuid.h
277277
)
278278

279+
set(gpu_files
280+
gpuintrin.h
281+
nvptxintrin.h
282+
amdgpuintrin.h
283+
)
284+
279285
set(windows_only_files
280286
intrin0.h
281287
intrin.h
@@ -304,6 +310,7 @@ set(files
304310
${systemz_files}
305311
${ve_files}
306312
${x86_files}
313+
${gpu_files}
307314
${webassembly_files}
308315
${windows_only_files}
309316
${utility_files}
@@ -526,6 +533,7 @@ add_header_target("systemz-resource-headers" "${systemz_files};${zos_wrapper_fil
526533
add_header_target("ve-resource-headers" "${ve_files}")
527534
add_header_target("webassembly-resource-headers" "${webassembly_files}")
528535
add_header_target("x86-resource-headers" "${x86_files}")
536+
add_header_target("gpu-resource-headers" "${gpu_files}")
529537

530538
# Other header groupings
531539
add_header_target("hlsl-resource-headers" ${hlsl_files})
@@ -712,6 +720,12 @@ install(
712720
EXCLUDE_FROM_ALL
713721
COMPONENT x86-resource-headers)
714722

723+
install(
724+
FILES ${gpu_files}
725+
DESTINATION ${header_install_dir}
726+
EXCLUDE_FROM_ALL
727+
COMPONENT gpu-resource-headers)
728+
715729
if(NOT CLANG_ENABLE_HLSL)
716730
set(EXCLUDE_HLSL EXCLUDE_FROM_ALL)
717731
endif()

clang/lib/Headers/amdgpuintrin.h

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
//===-- amdgpuintrin.h - AMDPGU intrinsic functions -----------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __AMDGPUINTRIN_H
10+
#define __AMDGPUINTRIN_H
11+
12+
#ifndef __AMDGPU__
13+
#error "This file is intended for AMDGPU targets or offloading to AMDGPU"
14+
#endif
15+
16+
#include <stdint.h>
17+
18+
#if !defined(__cplusplus)
19+
_Pragma("push_macro(\"bool\")");
20+
#define bool _Bool
21+
#endif
22+
23+
_Pragma("omp begin declare target device_type(nohost)");
24+
_Pragma("omp begin declare variant match(device = {arch(amdgcn)})");
25+
26+
// Type aliases to the address spaces used by the AMDGPU backend.
27+
#define __gpu_private __attribute__((opencl_private))
28+
#define __gpu_constant __attribute__((opencl_constant))
29+
#define __gpu_local __attribute__((opencl_local))
30+
#define __gpu_global __attribute__((opencl_global))
31+
#define __gpu_generic __attribute__((opencl_generic))
32+
33+
// Attribute to declare a function as a kernel.
34+
#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
35+
36+
// Returns the number of workgroups in the 'x' dimension of the grid.
37+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
38+
return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
39+
}
40+
41+
// Returns the number of workgroups in the 'y' dimension of the grid.
42+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) {
43+
return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y();
44+
}
45+
46+
// Returns the number of workgroups in the 'z' dimension of the grid.
47+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) {
48+
return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();
49+
}
50+
51+
// Returns the 'x' dimension of the current AMD workgroup's id.
52+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) {
53+
return __builtin_amdgcn_workgroup_id_x();
54+
}
55+
56+
// Returns the 'y' dimension of the current AMD workgroup's id.
57+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) {
58+
return __builtin_amdgcn_workgroup_id_y();
59+
}
60+
61+
// Returns the 'z' dimension of the current AMD workgroup's id.
62+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) {
63+
return __builtin_amdgcn_workgroup_id_z();
64+
}
65+
66+
// Returns the number of workitems in the 'x' dimension.
67+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) {
68+
return __builtin_amdgcn_workgroup_size_x();
69+
}
70+
71+
// Returns the number of workitems in the 'y' dimension.
72+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) {
73+
return __builtin_amdgcn_workgroup_size_y();
74+
}
75+
76+
// Returns the number of workitems in the 'z' dimension.
77+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) {
78+
return __builtin_amdgcn_workgroup_size_z();
79+
}
80+
81+
// Returns the 'x' dimension id of the workitem in the current AMD workgroup.
82+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) {
83+
return __builtin_amdgcn_workitem_id_x();
84+
}
85+
86+
// Returns the 'y' dimension id of the workitem in the current AMD workgroup.
87+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) {
88+
return __builtin_amdgcn_workitem_id_y();
89+
}
90+
91+
// Returns the 'z' dimension id of the workitem in the current AMD workgroup.
92+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) {
93+
return __builtin_amdgcn_workitem_id_z();
94+
}
95+
96+
// Returns the size of an AMD wavefront, either 32 or 64 depending on hardware
97+
// and compilation options.
98+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) {
99+
return __builtin_amdgcn_wavefrontsize();
100+
}
101+
102+
// Returns the id of the thread inside of an AMD wavefront executing together.
103+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) {
104+
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
105+
}
106+
107+
// Returns the bit-mask of active threads in the current wavefront.
108+
_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) {
109+
return __builtin_amdgcn_read_exec();
110+
}
111+
112+
// Copies the value from the first active thread in the wavefront to the rest.
113+
_DEFAULT_FN_ATTRS static __inline__ uint32_t
114+
__gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x) {
115+
return __builtin_amdgcn_readfirstlane(__x);
116+
}
117+
118+
// Copies the value from the first active thread in the wavefront to the rest.
119+
_DEFAULT_FN_ATTRS __inline__ uint64_t
120+
__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) {
121+
uint32_t __hi = (uint32_t)(__x >> 32ull);
122+
uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
123+
return ((uint64_t)__builtin_amdgcn_readfirstlane(__hi) << 32ull) |
124+
((uint64_t)__builtin_amdgcn_readfirstlane(__lo));
125+
}
126+
127+
// Returns a bitmask of threads in the current lane for which \p x is true.
128+
_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask,
129+
bool __x) {
130+
// The lane_mask & gives the nvptx semantics when lane_mask is a subset of
131+
// the active threads
132+
return __lane_mask & __builtin_amdgcn_ballot_w64(__x);
133+
}
134+
135+
// Waits for all the threads in the block to converge and issues a fence.
136+
_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_threads(void) {
137+
__builtin_amdgcn_s_barrier();
138+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
139+
}
140+
141+
// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
142+
_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
143+
__builtin_amdgcn_wave_barrier();
144+
}
145+
146+
// Shuffles the the lanes inside the wavefront according to the given index.
147+
_DEFAULT_FN_ATTRS static __inline__ uint32_t
148+
__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) {
149+
return __builtin_amdgcn_ds_bpermute(__idx << 2, __x);
150+
}
151+
152+
// Shuffles the the lanes inside the wavefront according to the given index.
153+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
154+
__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x) {
155+
uint32_t __hi = (uint32_t)(__x >> 32ull);
156+
uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
157+
return ((uint64_t)__builtin_amdgcn_ds_bpermute(__idx << 2, __hi) << 32ull) |
158+
((uint64_t)__builtin_amdgcn_ds_bpermute(__idx << 2, __lo));
159+
}
160+
161+
// Returns true if the flat pointer points to CUDA 'shared' memory.
162+
_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
163+
return __builtin_amdgcn_is_shared(
164+
(void __attribute__((address_space(0))) *)((void __gpu_generic *)ptr));
165+
}
166+
167+
// Returns true if the flat pointer points to CUDA 'local' memory.
168+
_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_private(void *ptr) {
169+
return __builtin_amdgcn_is_private(
170+
(void __attribute__((address_space(0))) *)((void __gpu_generic *)ptr));
171+
}
172+
173+
// Terminates execution of the associated wavefront.
174+
_DEFAULT_FN_ATTRS [[noreturn]] static __inline__ void __gpu_exit(void) {
175+
__builtin_amdgcn_endpgm();
176+
}
177+
178+
// Suspend the thread briefly to assist the scheduler during busy loops.
179+
_DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
180+
__builtin_amdgcn_s_sleep(2);
181+
}
182+
183+
_Pragma("omp end declare variant");
184+
_Pragma("omp end declare target");
185+
186+
#if !defined(__cplusplus)
187+
_Pragma("pop_macro(\"bool\")");
188+
#endif
189+
190+
#endif // __AMDGPUINTRIN_H

0 commit comments

Comments
 (0)