Skip to content

Commit 96146a0

Browse files
committed
Merge remote-tracking branch 'origin/main' into AMX-TRANSPOSE
2 parents f2fc493 + 01d233f commit 96146a0

File tree

1,783 files changed

+32349
-14817
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,783 files changed

+32349
-14817
lines changed

.ci/generate-buildkite-pipeline-premerge

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ if [[ "${linux_projects}" != "" ]]; then
272272
artifact_paths:
273273
- 'artifacts/**/*'
274274
- '*_result.json'
275-
- 'build/test-results.xml'
275+
- 'build/test-results.*.xml'
276276
agents: ${LINUX_AGENTS}
277277
retry:
278278
automatic:
@@ -295,7 +295,7 @@ if [[ "${windows_projects}" != "" ]]; then
295295
artifact_paths:
296296
- 'artifacts/**/*'
297297
- '*_result.json'
298-
- 'build/test-results.xml'
298+
- 'build/test-results.*.xml'
299299
agents: ${WINDOWS_AGENTS}
300300
retry:
301301
automatic:

.ci/monolithic-linux.sh

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ trap show-stats EXIT
3737
projects="${1}"
3838
targets="${2}"
3939

40+
lit_args="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --use-unique-output-file-name --timeout=1200 --time-tests"
41+
4042
echo "--- cmake"
4143
pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt
4244
pip install -q -r "${MONOREPO_ROOT}"/lldb/test/requirements.txt
@@ -47,7 +49,7 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
4749
-D LLVM_ENABLE_ASSERTIONS=ON \
4850
-D LLVM_BUILD_EXAMPLES=ON \
4951
-D COMPILER_RT_BUILD_LIBFUZZER=OFF \
50-
-D LLVM_LIT_ARGS="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests" \
52+
-D LLVM_LIT_ARGS="${lit_args}" \
5153
-D LLVM_ENABLE_LLD=ON \
5254
-D CMAKE_CXX_FLAGS=-gmlt \
5355
-D LLVM_CCACHE_BUILD=ON \
@@ -87,7 +89,8 @@ if [[ "${runtimes}" != "" ]]; then
8789
-D CMAKE_BUILD_TYPE=RelWithDebInfo \
8890
-D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
8991
-D LIBCXX_TEST_PARAMS="std=c++03" \
90-
-D LIBCXXABI_TEST_PARAMS="std=c++03"
92+
-D LIBCXXABI_TEST_PARAMS="std=c++03" \
93+
-D LLVM_LIT_ARGS="${lit_args}"
9194

9295
echo "--- ninja runtimes C++03"
9396

@@ -104,7 +107,8 @@ if [[ "${runtimes}" != "" ]]; then
104107
-D CMAKE_BUILD_TYPE=RelWithDebInfo \
105108
-D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
106109
-D LIBCXX_TEST_PARAMS="std=c++26" \
107-
-D LIBCXXABI_TEST_PARAMS="std=c++26"
110+
-D LIBCXXABI_TEST_PARAMS="std=c++26" \
111+
-D LLVM_LIT_ARGS="${lit_args}"
108112

109113
echo "--- ninja runtimes C++26"
110114

@@ -121,7 +125,8 @@ if [[ "${runtimes}" != "" ]]; then
121125
-D CMAKE_BUILD_TYPE=RelWithDebInfo \
122126
-D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
123127
-D LIBCXX_TEST_PARAMS="enable_modules=clang" \
124-
-D LIBCXXABI_TEST_PARAMS="enable_modules=clang"
128+
-D LIBCXXABI_TEST_PARAMS="enable_modules=clang" \
129+
-D LLVM_LIT_ARGS="${lit_args}"
125130

126131
echo "--- ninja runtimes clang modules"
127132

.ci/monolithic-windows.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
5353
-D LLVM_ENABLE_ASSERTIONS=ON \
5454
-D LLVM_BUILD_EXAMPLES=ON \
5555
-D COMPILER_RT_BUILD_LIBFUZZER=OFF \
56-
-D LLVM_LIT_ARGS="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests" \
56+
-D LLVM_LIT_ARGS="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --use-unique-output-file-name --timeout=1200 --time-tests" \
5757
-D COMPILER_RT_BUILD_ORC=OFF \
5858
-D CMAKE_C_COMPILER_LAUNCHER=sccache \
5959
-D CMAKE_CXX_COMPILER_LAUNCHER=sccache \

.github/new-prs-labeler.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,14 @@ llvm:transforms:
605605
- llvm/test/Transforms/**
606606
- llvm/unittests/Transforms/**
607607

608+
llvm:instcombine:
609+
- llvm/lib/Analysis/InstructionSimplify.cpp
610+
- llvm/lib/Transforms/InstCombine/**
611+
- llvm/include/llvm/Transforms/InstCombine/
612+
- llvm/include/llvm/Analysis/InstructionSimplify.h
613+
- llvm/test/Transforms/InstCombine/**
614+
- llvm/test/Transforms/InstSimplify/**
615+
608616
clangd:
609617
- clang-tools-extra/clangd/**
610618

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -723,12 +723,28 @@ class BinaryContext {
723723
/// Stats for stale profile matching:
724724
/// the total number of basic blocks in the profile
725725
uint32_t NumStaleBlocks{0};
726-
/// the number of matched basic blocks
727-
uint32_t NumMatchedBlocks{0};
726+
/// the number of exactly matched basic blocks
727+
uint32_t NumExactMatchedBlocks{0};
728+
/// the number of loosely matched basic blocks
729+
uint32_t NumLooseMatchedBlocks{0};
730+
/// the number of exactly pseudo probe matched basic blocks
731+
uint32_t NumPseudoProbeExactMatchedBlocks{0};
732+
/// the number of loosely pseudo probe matched basic blocks
733+
uint32_t NumPseudoProbeLooseMatchedBlocks{0};
734+
/// the number of call matched basic blocks
735+
uint32_t NumCallMatchedBlocks{0};
728736
/// the total count of samples in the profile
729737
uint64_t StaleSampleCount{0};
730-
/// the count of matched samples
731-
uint64_t MatchedSampleCount{0};
738+
/// the count of exactly matched samples
739+
uint64_t ExactMatchedSampleCount{0};
740+
/// the count of loosely matched samples
741+
uint64_t LooseMatchedSampleCount{0};
742+
/// the count of exactly pseudo probe matched samples
743+
uint64_t PseudoProbeExactMatchedSampleCount{0};
744+
/// the count of loosely pseudo probe matched samples
745+
uint64_t PseudoProbeLooseMatchedSampleCount{0};
746+
/// the count of call matched samples
747+
uint64_t CallMatchedSampleCount{0};
732748
/// the number of stale functions that have matching number of blocks in
733749
/// the profile
734750
uint64_t NumStaleFuncsWithEqualBlockCount{0};

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,6 +1536,14 @@ class MCPlusBuilder {
15361536
llvm_unreachable("not implemented");
15371537
}
15381538

1539+
/// Match function \p BF to a long veneer for absolute code. Return true if
1540+
/// the match was successful and populate \p TargetAddress with an address of
1541+
/// the function veneer jumps to.
1542+
virtual bool matchAbsLongVeneer(const BinaryFunction &BF,
1543+
uint64_t &TargetAddress) const {
1544+
llvm_unreachable("not implemented");
1545+
}
1546+
15391547
virtual bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const {
15401548
llvm_unreachable("not implemented");
15411549
return false;

bolt/include/bolt/Profile/ProfileYAMLMapping.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ struct InlineTreeNode {
174174
uint32_t CallSiteProbe;
175175
// Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted)
176176
uint32_t GUIDIndex;
177+
// Decoded contents, ParentIndexDelta becomes absolute value.
178+
uint64_t GUID;
179+
uint64_t Hash;
177180
bool operator==(const InlineTreeNode &) const { return false; }
178181
};
179182
} // end namespace bolt

bolt/include/bolt/Profile/YAMLProfileReader.h

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include <unordered_set>
1515

1616
namespace llvm {
17+
class MCDecodedPseudoProbeInlineTree;
18+
1719
namespace bolt {
1820

1921
class YAMLProfileReader : public ProfileReaderBase {
@@ -43,6 +45,9 @@ class YAMLProfileReader : public ProfileReaderBase {
4345
using ProfileLookupMap =
4446
DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>;
4547

48+
using GUIDInlineTreeMap =
49+
std::unordered_map<uint64_t, const MCDecodedPseudoProbeInlineTree *>;
50+
4651
/// A class for matching binary functions in functions in the YAML profile.
4752
/// First, a call graph is constructed for both profiled and binary functions.
4853
/// Then functions are hashed based on the names of their callee/caller
@@ -96,6 +101,61 @@ class YAMLProfileReader : public ProfileReaderBase {
96101
YamlBFAdjacencyMap;
97102
};
98103

104+
// A class for matching inline tree nodes between profile and binary.
105+
// Provides the mapping from profile inline tree node id to a
106+
// corresponding binary MCDecodedPseudoProbeInlineTree node.
107+
//
108+
// The whole mapping process is the following:
109+
//
110+
// (profile) (binary)
111+
// | blocks ^
112+
// v |
113+
// yaml::bolt::BinaryBasicBlockProfile ~= FlowBlock
114+
// ||| probes ^ (majority vote)
115+
// v ||| BBPseudoProbeToBlock
116+
// yaml::bolt::PseudoProbeInfo MCDecodedPseudoProbe
117+
// | InlineTreeIndex ^
118+
// v | probe id
119+
// [ profile node id (uint32_t) -> MCDecodedPseudoProbeInlineTree *]
120+
// InlineTreeNodeMapTy
121+
class InlineTreeNodeMapTy {
122+
DenseMap<uint32_t, const MCDecodedPseudoProbeInlineTree *> Map;
123+
124+
void mapInlineTreeNode(uint32_t ProfileNodeIdx,
125+
const MCDecodedPseudoProbeInlineTree *BinaryNode) {
126+
auto Res = Map.try_emplace(ProfileNodeIdx, BinaryNode);
127+
assert(Res.second &&
128+
"Duplicate mapping from profile node index to binary inline tree");
129+
(void)Res;
130+
}
131+
132+
public:
133+
/// Returns matched InlineTree * for a given profile inline_tree_id.
134+
const MCDecodedPseudoProbeInlineTree *
135+
getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
136+
auto It = Map.find(ProfileInlineTreeNodeId);
137+
if (It == Map.end())
138+
return nullptr;
139+
return It->second;
140+
}
141+
142+
// Match up \p YamlInlineTree with binary inline tree rooted at \p Root.
143+
// Return the number of matched nodes.
144+
//
145+
// This function populates the mapping from profile inline tree node id to a
146+
// corresponding binary MCDecodedPseudoProbeInlineTree node.
147+
size_t matchInlineTrees(
148+
const MCPseudoProbeDecoder &Decoder,
149+
const std::vector<yaml::bolt::InlineTreeNode> &YamlInlineTree,
150+
const MCDecodedPseudoProbeInlineTree *Root);
151+
};
152+
153+
// Partial probe matching specification: matched inline tree and corresponding
154+
// BinaryFunctionProfile
155+
using ProbeMatchSpec =
156+
std::pair<InlineTreeNodeMapTy,
157+
std::reference_wrapper<yaml::bolt::BinaryFunctionProfile>>;
158+
99159
private:
100160
/// Adjustments for basic samples profiles (without LBR).
101161
bool NormalizeByInsnCount{false};
@@ -129,6 +189,13 @@ class YAMLProfileReader : public ProfileReaderBase {
129189
/// BinaryFunction pointers indexed by YamlBP functions.
130190
std::vector<BinaryFunction *> ProfileBFs;
131191

192+
// Pseudo probe function GUID to inline tree node
193+
GUIDInlineTreeMap TopLevelGUIDToInlineTree;
194+
195+
// Mapping from a binary function to its partial match specification
196+
// (YAML profile and its inline tree mapping to binary).
197+
DenseMap<BinaryFunction *, std::vector<ProbeMatchSpec>> BFToProbeMatchSpecs;
198+
132199
/// Populate \p Function profile with the one supplied in YAML format.
133200
bool parseFunctionProfile(BinaryFunction &Function,
134201
const yaml::bolt::BinaryFunctionProfile &YamlBF);
@@ -139,7 +206,8 @@ class YAMLProfileReader : public ProfileReaderBase {
139206

140207
/// Infer function profile from stale data (collected on older binaries).
141208
bool inferStaleProfile(BinaryFunction &Function,
142-
const yaml::bolt::BinaryFunctionProfile &YamlBF);
209+
const yaml::bolt::BinaryFunctionProfile &YamlBF,
210+
const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs);
143211

144212
/// Initialize maps for profile matching.
145213
void buildNameMaps(BinaryContext &BC);
@@ -156,6 +224,10 @@ class YAMLProfileReader : public ProfileReaderBase {
156224
/// Matches functions using the call graph.
157225
size_t matchWithCallGraph(BinaryContext &BC);
158226

227+
/// Matches functions using the call graph.
228+
/// Populates BF->partial probe match spec map.
229+
size_t matchWithPseudoProbes(BinaryContext &BC);
230+
159231
/// Matches functions with similarly named profiled functions.
160232
size_t matchWithNameSimilarity(BinaryContext &BC);
161233

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2580,6 +2580,7 @@ struct CFISnapshot {
25802580
case MCCFIInstruction::OpNegateRAStateWithPC:
25812581
case MCCFIInstruction::OpLLVMDefAspaceCfa:
25822582
case MCCFIInstruction::OpLabel:
2583+
case MCCFIInstruction::OpValOffset:
25832584
llvm_unreachable("unsupported CFI opcode");
25842585
break;
25852586
case MCCFIInstruction::OpRememberState:
@@ -2719,6 +2720,7 @@ struct CFISnapshotDiff : public CFISnapshot {
27192720
case MCCFIInstruction::OpNegateRAStateWithPC:
27202721
case MCCFIInstruction::OpLLVMDefAspaceCfa:
27212722
case MCCFIInstruction::OpLabel:
2723+
case MCCFIInstruction::OpValOffset:
27222724
llvm_unreachable("unsupported CFI opcode");
27232725
return false;
27242726
case MCCFIInstruction::OpRememberState:
@@ -2869,6 +2871,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
28692871
case MCCFIInstruction::OpNegateRAStateWithPC:
28702872
case MCCFIInstruction::OpLLVMDefAspaceCfa:
28712873
case MCCFIInstruction::OpLabel:
2874+
case MCCFIInstruction::OpValOffset:
28722875
llvm_unreachable("unsupported CFI opcode");
28732876
break;
28742877
case MCCFIInstruction::OpGnuArgsSize:

bolt/lib/Core/Exceptions.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,7 @@ Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
108108
DWARFDataExtractor Data(
109109
StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110110
LSDASectionData.size()),
111-
BC.DwCtx->getDWARFObj().isLittleEndian(),
112-
BC.DwCtx->getDWARFObj().getAddressSize());
111+
BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
113112
uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
114113
assert(Data.isValidOffset(Offset) && "wrong LSDA address");
115114

0 commit comments

Comments
 (0)