Skip to content

Commit e06b11e

Browse files
committed
Merge remote-tracking branch 'origin/main' into pr/s32-shift
2 parents 353045d + 62a7bb0 commit e06b11e

File tree

617 files changed

+134428
-124702
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

617 files changed

+134428
-124702
lines changed

bolt/include/bolt/Profile/YAMLProfileReader.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class YAMLProfileReader : public ProfileReaderBase {
105105
yaml::bolt::BinaryProfile YamlBP;
106106

107107
/// Map a function ID from a YAML profile to a BinaryFunction object.
108-
std::vector<BinaryFunction *> YamlProfileToFunction;
108+
DenseMap<uint32_t, BinaryFunction *> YamlProfileToFunction;
109109

110110
using FunctionSet = std::unordered_set<const BinaryFunction *>;
111111
/// To keep track of functions that have a matched profile before the profile
@@ -162,8 +162,6 @@ class YAMLProfileReader : public ProfileReaderBase {
162162
/// Update matched YAML -> BinaryFunction pair.
163163
void matchProfileToFunction(yaml::bolt::BinaryFunctionProfile &YamlBF,
164164
BinaryFunction &BF) {
165-
if (YamlBF.Id >= YamlProfileToFunction.size())
166-
YamlProfileToFunction.resize(YamlBF.Id + 1);
167165
YamlProfileToFunction[YamlBF.Id] = &BF;
168166
YamlBF.Used = true;
169167

bolt/lib/Profile/YAMLProfileReader.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,7 @@ bool YAMLProfileReader::parseFunctionProfile(
238238
BB.setExecutionCount(YamlBB.ExecCount);
239239

240240
for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) {
241-
BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size()
242-
? YamlProfileToFunction[YamlCSI.DestId]
243-
: nullptr;
241+
BinaryFunction *Callee = YamlProfileToFunction.lookup(YamlCSI.DestId);
244242
bool IsFunction = Callee ? true : false;
245243
MCSymbol *CalleeSymbol = nullptr;
246244
if (IsFunction)
@@ -703,7 +701,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
703701
break;
704702
}
705703
}
706-
YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);
704+
YamlProfileToFunction.reserve(YamlBP.Functions.size());
707705

708706
// Computes hash for binary functions.
709707
if (opts::MatchProfileWithFunctionHash) {
@@ -756,12 +754,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
756754
NormalizeByCalls = usesEvent("branches");
757755
uint64_t NumUnused = 0;
758756
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
759-
if (YamlBF.Id >= YamlProfileToFunction.size()) {
760-
// Such profile was ignored.
761-
++NumUnused;
762-
continue;
763-
}
764-
if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id])
757+
if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id))
765758
parseFunctionProfile(*BF, YamlBF);
766759
else
767760
++NumUnused;

clang/lib/Basic/Attributes.cpp

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "clang/Basic/TargetInfo.h"
1919

2020
#include "llvm/ADT/StringMap.h"
21+
#include "llvm/ADT/StringSwitch.h"
2122

2223
using namespace clang;
2324

@@ -155,26 +156,17 @@ std::string AttributeCommonInfo::getNormalizedFullName() const {
155156
normalizeName(getAttrName(), getScopeName(), getSyntax()));
156157
}
157158

158-
// Sorted list of attribute scope names
159-
static constexpr std::pair<StringRef, AttributeCommonInfo::Scope> ScopeList[] =
160-
{{"", AttributeCommonInfo::Scope::NONE},
161-
{"clang", AttributeCommonInfo::Scope::CLANG},
162-
{"gnu", AttributeCommonInfo::Scope::GNU},
163-
{"gsl", AttributeCommonInfo::Scope::GSL},
164-
{"hlsl", AttributeCommonInfo::Scope::HLSL},
165-
{"msvc", AttributeCommonInfo::Scope::MSVC},
166-
{"omp", AttributeCommonInfo::Scope::OMP},
167-
{"riscv", AttributeCommonInfo::Scope::RISCV}};
168-
169159
AttributeCommonInfo::Scope
170160
getScopeFromNormalizedScopeName(StringRef ScopeName) {
171-
auto It = std::lower_bound(
172-
std::begin(ScopeList), std::end(ScopeList), ScopeName,
173-
[](const std::pair<StringRef, AttributeCommonInfo::Scope> &Element,
174-
StringRef Value) { return Element.first < Value; });
175-
assert(It != std::end(ScopeList) && It->first == ScopeName);
176-
177-
return It->second;
161+
return llvm::StringSwitch<AttributeCommonInfo::Scope>(ScopeName)
162+
.Case("", AttributeCommonInfo::Scope::NONE)
163+
.Case("clang", AttributeCommonInfo::Scope::CLANG)
164+
.Case("gnu", AttributeCommonInfo::Scope::GNU)
165+
.Case("gsl", AttributeCommonInfo::Scope::GSL)
166+
.Case("hlsl", AttributeCommonInfo::Scope::HLSL)
167+
.Case("msvc", AttributeCommonInfo::Scope::MSVC)
168+
.Case("omp", AttributeCommonInfo::Scope::OMP)
169+
.Case("riscv", AttributeCommonInfo::Scope::RISCV);
178170
}
179171

180172
unsigned AttributeCommonInfo::calculateAttributeSpellingListIndex() const {

clang/utils/perf-training/bolt.lit.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,6 @@ config.substitutions.append(("%clang_cpp", f" {config.clang} --driver-mode=g++ "
4949
config.substitutions.append(("%clang_skip_driver", config.clang))
5050
config.substitutions.append(("%clang", config.clang))
5151
config.substitutions.append(("%test_root", config.test_exec_root))
52+
config.substitutions.append(('%cmake_generator', config.cmake_generator))
53+
config.substitutions.append(('%cmake', config.cmake_exe))
54+
config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir))

clang/utils/perf-training/bolt.lit.site.cfg.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ config.python_exe = "@Python3_EXECUTABLE@"
1111
config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
1212
config.clang_bolt_mode = "@CLANG_BOLT@"
1313
config.clang_bolt_name = "@CLANG_BOLT_INSTRUMENTED@"
14+
config.cmake_exe = "@CMAKE_COMMAND@"
15+
config.llvm_src_dir ="@CMAKE_SOURCE_DIR@"
16+
config.cmake_generator ="@CMAKE_GENERATOR@"
1417

1518
# Let the main config do the real work.
1619
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")

clang/utils/perf-training/lit.cfg

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,11 @@ config.test_format = lit.formats.ShTest(use_lit_shell == "0")
3434
config.substitutions.append( ('%clang_cpp_skip_driver', ' %s %s %s ' % (cc1_wrapper, config.clang, sysroot_flags)))
3535
config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
3636
config.substitutions.append( ('%clang_skip_driver', ' %s %s %s ' % (cc1_wrapper, config.clang, sysroot_flags)))
37-
config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
37+
config.substitutions.append( ('%clang', '%s %s ' % (config.clang, sysroot_flags) ) )
3838
config.substitutions.append( ('%test_root', config.test_exec_root ) )
39+
config.substitutions.append( ('%cmake_generator', config.cmake_generator ) )
40+
config.substitutions.append( ('%cmake', config.cmake_exe ) )
41+
config.substitutions.append( ('%llvm_src_dir', config.llvm_src_dir ) )
3942

4043
config.environment['LLVM_PROFILE_FILE'] = 'perf-training-%4m.profraw'
4144

clang/utils/perf-training/lit.site.cfg.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
88
config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
99
config.target_triple = "@LLVM_TARGET_TRIPLE@"
1010
config.python_exe = "@Python3_EXECUTABLE@"
11+
config.cmake_exe = "@CMAKE_COMMAND@"
12+
config.llvm_src_dir ="@CMAKE_SOURCE_DIR@"
13+
config.cmake_generator ="@CMAKE_GENERATOR@"
1114

1215
# Let the main config do the real work.
1316
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/lit.cfg")
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_C_COMPILER=%clang -DCMAKE_CXX_COMPILER=%clang -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release
2+
RUN: %cmake --build %t -v --target LLVMSupport

flang/lib/Optimizer/Transforms/CUFOpConversion.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,13 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
337337
seqTy.getConstantArraySize());
338338
}
339339
bytes = rewriter.create<mlir::arith::MulIOp>(loc, nbElem, width);
340+
} else if (fir::isa_derived(op.getInType())) {
341+
mlir::Type structTy = typeConverter->convertType(op.getInType());
342+
std::size_t structSize = dl->getTypeSizeInBits(structTy) / 8;
343+
bytes = builder.createIntegerConstant(loc, builder.getIndexType(),
344+
structSize);
345+
} else {
346+
mlir::emitError(loc, "unsupported type in cuf.alloc\n");
340347
}
341348
mlir::func::FuncOp func =
342349
fir::runtime::getRuntimeFunc<mkRTKey(CUFMemAlloc)>(loc, builder);

flang/runtime/CUDA/kernel.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,55 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
2525
blockDim.x = blockX;
2626
blockDim.y = blockY;
2727
blockDim.z = blockZ;
28+
unsigned nbNegGridDim{0};
29+
if (gridX < 0) {
30+
++nbNegGridDim;
31+
}
32+
if (gridY < 0) {
33+
++nbNegGridDim;
34+
}
35+
if (gridZ < 0) {
36+
++nbNegGridDim;
37+
}
38+
if (nbNegGridDim == 1) {
39+
int maxBlocks, nbBlocks, dev, multiProcCount;
40+
cudaError_t err1, err2;
41+
nbBlocks = blockDim.x * blockDim.y * blockDim.z;
42+
cudaGetDevice(&dev);
43+
err1 = cudaDeviceGetAttribute(
44+
&multiProcCount, cudaDevAttrMultiProcessorCount, dev);
45+
err2 = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
46+
&maxBlocks, kernel, nbBlocks, smem);
47+
if (err1 == cudaSuccess && err2 == cudaSuccess) {
48+
maxBlocks = multiProcCount * maxBlocks;
49+
}
50+
if (maxBlocks > 0) {
51+
if (gridDim.x > 0) {
52+
maxBlocks = maxBlocks / gridDim.x;
53+
}
54+
if (gridDim.y > 0) {
55+
maxBlocks = maxBlocks / gridDim.y;
56+
}
57+
if (gridDim.z > 0) {
58+
maxBlocks = maxBlocks / gridDim.z;
59+
}
60+
if (maxBlocks < 1) {
61+
maxBlocks = 1;
62+
}
63+
if (gridX < 0) {
64+
gridDim.x = maxBlocks;
65+
}
66+
if (gridY < 0) {
67+
gridDim.y = maxBlocks;
68+
}
69+
if (gridZ < 0) {
70+
gridDim.z = maxBlocks;
71+
}
72+
}
73+
} else if (nbNegGridDim > 1) {
74+
Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
75+
terminator.Crash("Too many invalid grid dimensions");
76+
}
2877
cudaStream_t stream = 0; // TODO stream managment
2978
CUDA_REPORT_IF_ERROR(
3079
cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, stream));
@@ -41,6 +90,55 @@ void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
4190
config.blockDim.x = blockX;
4291
config.blockDim.y = blockY;
4392
config.blockDim.z = blockZ;
93+
unsigned nbNegGridDim{0};
94+
if (gridX < 0) {
95+
++nbNegGridDim;
96+
}
97+
if (gridY < 0) {
98+
++nbNegGridDim;
99+
}
100+
if (gridZ < 0) {
101+
++nbNegGridDim;
102+
}
103+
if (nbNegGridDim == 1) {
104+
int maxBlocks, nbBlocks, dev, multiProcCount;
105+
cudaError_t err1, err2;
106+
nbBlocks = config.blockDim.x * config.blockDim.y * config.blockDim.z;
107+
cudaGetDevice(&dev);
108+
err1 = cudaDeviceGetAttribute(
109+
&multiProcCount, cudaDevAttrMultiProcessorCount, dev);
110+
err2 = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
111+
&maxBlocks, kernel, nbBlocks, smem);
112+
if (err1 == cudaSuccess && err2 == cudaSuccess) {
113+
maxBlocks = multiProcCount * maxBlocks;
114+
}
115+
if (maxBlocks > 0) {
116+
if (config.gridDim.x > 0) {
117+
maxBlocks = maxBlocks / config.gridDim.x;
118+
}
119+
if (config.gridDim.y > 0) {
120+
maxBlocks = maxBlocks / config.gridDim.y;
121+
}
122+
if (config.gridDim.z > 0) {
123+
maxBlocks = maxBlocks / config.gridDim.z;
124+
}
125+
if (maxBlocks < 1) {
126+
maxBlocks = 1;
127+
}
128+
if (gridX < 0) {
129+
config.gridDim.x = maxBlocks;
130+
}
131+
if (gridY < 0) {
132+
config.gridDim.y = maxBlocks;
133+
}
134+
if (gridZ < 0) {
135+
config.gridDim.z = maxBlocks;
136+
}
137+
}
138+
} else if (nbNegGridDim > 1) {
139+
Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
140+
terminator.Crash("Too many invalid grid dimensions");
141+
}
44142
config.dynamicSmemBytes = smem;
45143
config.stream = 0; // TODO stream managment
46144
cudaLaunchAttribute launchAttr[1];

0 commit comments

Comments
 (0)