Skip to content

Commit cd3d083

Browse files
authored
Merge branch 'main' into fix/111854
2 parents 661abfd + b35b583 commit cd3d083

File tree

18 files changed

+112
-68
lines changed

18 files changed

+112
-68
lines changed

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -634,8 +634,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
634634
std::vector<StringRef> Features;
635635
getNVPTXTargetFeatures(C.getDriver(), getToolChain().getTriple(), Args,
636636
Features);
637-
for (StringRef Feature : Features)
638-
CmdArgs.append({"--feature", Args.MakeArgString(Feature)});
637+
CmdArgs.push_back(
638+
Args.MakeArgString("--plugin-opt=mattr=" + llvm::join(Features, ",")));
639639

640640
// Add paths for the default clang library path.
641641
SmallString<256> DefaultLibPath =

clang/test/Driver/cuda-cross-compiling.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,4 @@
104104
// RUN: %clang -target nvptx64-nvidia-cuda --cuda-feature=+ptx63 -march=sm_52 -### %s 2>&1 \
105105
// RUN: | FileCheck -check-prefix=FEATURE %s
106106

107-
// FEATURE: clang-nvlink-wrapper{{.*}}"--feature" "+ptx63"
107+
// FEATURE: clang-nvlink-wrapper{{.*}}"--plugin-opt=mattr=+ptx63"

clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ Expected<std::unique_ptr<lto::LTO>> createLTO(const ArgList &Args) {
344344
Conf.RemarksHotnessThreshold = RemarksHotnessThreshold;
345345
Conf.RemarksFormat = RemarksFormat;
346346

347-
Conf.MAttrs = {Args.getLastArgValue(OPT_feature, "").str()};
347+
Conf.MAttrs = llvm::codegen::getMAttrs();
348348
std::optional<CodeGenOptLevel> CGOptLevelOrNone =
349349
CodeGenOpt::parseLevel(Args.getLastArgValue(OPT_O, "2")[0]);
350350
assert(CGOptLevelOrNone && "Invalid optimization level");

clang/tools/clang-nvlink-wrapper/NVLinkOpts.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@ def arch : Separate<["--", "-"], "arch">,
4747
def : Joined<["--", "-"], "plugin-opt=mcpu=">,
4848
Flags<[HelpHidden, WrapperOnlyOption]>, Alias<arch>;
4949

50-
def feature : Separate<["--", "-"], "feature">, Flags<[WrapperOnlyOption]>,
51-
HelpText<"Specify the '+ptx' freature to use for LTO.">;
52-
5350
def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile.">;
5451
def debug : Flag<["--"], "debug">, Alias<g>;
5552

compiler-rt/lib/lsan/lsan_common.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,27 @@ struct DirectMemoryAccessor {
293293
void Init(uptr begin, uptr end) {};
294294
void *LoadPtr(uptr p) const { return *reinterpret_cast<void **>(p); }
295295
};
296+
297+
struct CopyMemoryAccessor {
298+
void Init(uptr begin, uptr end) {
299+
this->begin = begin;
300+
buffer.clear();
301+
buffer.resize(end - begin);
302+
MemCpyAccessible(buffer.data(), reinterpret_cast<void *>(begin),
303+
buffer.size());
304+
};
305+
306+
void *LoadPtr(uptr p) const {
307+
uptr offset = p - begin;
308+
CHECK_LE(offset + sizeof(void *), reinterpret_cast<uptr>(buffer.size()));
309+
return *reinterpret_cast<void **>(offset +
310+
reinterpret_cast<uptr>(buffer.data()));
311+
}
312+
313+
private:
314+
uptr begin;
315+
InternalMmapVector<char> buffer;
316+
};
296317
} // namespace
297318

298319
// Scans the memory range, looking for byte patterns that point into allocator
@@ -535,6 +556,7 @@ static void ProcessThread(tid_t os_id, uptr sp,
535556
static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
536557
Frontier *frontier, tid_t caller_tid,
537558
uptr caller_sp) {
559+
InternalMmapVector<tid_t> done_threads;
538560
InternalMmapVector<uptr> registers;
539561
InternalMmapVector<Range> extra_ranges;
540562
for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) {
@@ -559,6 +581,25 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
559581

560582
DirectMemoryAccessor accessor;
561583
ProcessThread(os_id, sp, registers, extra_ranges, frontier, accessor);
584+
if (flags()->use_detached)
585+
done_threads.push_back(os_id);
586+
}
587+
588+
if (flags()->use_detached) {
589+
CopyMemoryAccessor accessor;
590+
InternalMmapVector<tid_t> known_threads;
591+
GetRunningThreadsLocked(&known_threads);
592+
Sort(done_threads.data(), done_threads.size());
593+
for (tid_t os_id : known_threads) {
594+
registers.clear();
595+
extra_ranges.clear();
596+
597+
uptr i = InternalLowerBound(done_threads, os_id);
598+
if (i >= done_threads.size() || done_threads[i] != os_id) {
599+
uptr sp = (os_id == caller_tid) ? caller_sp : 0;
600+
ProcessThread(os_id, sp, registers, extra_ranges, frontier, accessor);
601+
}
602+
}
562603
}
563604

564605
// Add pointers reachable from ThreadContexts

compiler-rt/lib/lsan/lsan_flags.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ LSAN_FLAG(bool, use_ld_allocations, true,
4141
LSAN_FLAG(bool, use_unaligned, false, "Consider unaligned pointers valid.")
4242
LSAN_FLAG(bool, use_poisoned, false,
4343
"Consider pointers found in poisoned memory to be valid.")
44+
LSAN_FLAG(bool, use_detached, false,
45+
"Scan threads even if attaching to them failed.")
4446
LSAN_FLAG(bool, log_pointers, false, "Debug logging")
4547
LSAN_FLAG(bool, log_threads, false, "Debug logging")
4648
LSAN_FLAG(int, tries, 1, "Debug option to repeat leak checking multiple times")

lld/test/MachO/objc-category-merging-minimal.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
############ Test merging skipped due to invalid category name ############
3232
# Modify __OBJC_$_CATEGORY_MyBaseClass_$_Category01's name to point to L_OBJC_IMAGE_INFO+3
33-
# RUN: sed -E '/^__OBJC_\$_CATEGORY_MyBaseClass_\$_Category01:/ { n; s/^[ \t]*\.quad[ \t]+l_OBJC_CLASS_NAME_$/\t.quad\tL_OBJC_IMAGE_INFO+3/}' merge_cat_minimal.s > merge_cat_minimal_bad_name.s
33+
# RUN: awk '/^__OBJC_\$_CATEGORY_MyBaseClass_\$_Category01:/ { print; getline; sub(/^[ \t]*\.quad[ \t]+l_OBJC_CLASS_NAME_$/, "\t.quad\tL_OBJC_IMAGE_INFO+3"); print; next } { print }' merge_cat_minimal.s > merge_cat_minimal_bad_name.s
3434

3535
# Assemble the modified source
3636
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal_bad_name.o merge_cat_minimal_bad_name.s

llvm/include/llvm-c/Disassembler.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DC, uint64_t Options);
7979
#define LLVMDisassembler_Option_AsmPrinterVariant 4
8080
/* The option to set comment on instructions */
8181
#define LLVMDisassembler_Option_SetInstrComments 8
82-
/* The option to print latency information alongside instructions */
82+
/* The option to print latency information alongside instructions */
8383
#define LLVMDisassembler_Option_PrintLatency 16
84+
/* The option to print in color */
85+
#define LLVMDisassembler_Option_Color 32
8486

8587
/**
8688
* Dispose of a disassembler context.

llvm/lib/MC/MCDisassembler/Disassembler.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,12 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
277277
SmallVector<char, 64> InsnStr;
278278
raw_svector_ostream OS(InsnStr);
279279
formatted_raw_ostream FormattedOS(OS);
280+
281+
if (DC->getOptions() & LLVMDisassembler_Option_Color) {
282+
FormattedOS.enable_colors(true);
283+
IP->setUseColor(true);
284+
}
285+
280286
IP->printInst(&Inst, PC, AnnotationsStr, *DC->getSubtargetInfo(),
281287
FormattedOS);
282288

@@ -343,5 +349,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
343349
DC->addOptions(LLVMDisassembler_Option_PrintLatency);
344350
Options &= ~LLVMDisassembler_Option_PrintLatency;
345351
}
352+
if (Options & LLVMDisassembler_Option_Color) {
353+
LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
354+
DC->addOptions(LLVMDisassembler_Option_Color);
355+
Options &= ~LLVMDisassembler_Option_Color;
356+
}
346357
return (Options == 0);
347358
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2447,12 +2447,26 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24472447
};
24482448

24492449
TailFoldingStyle Style = Cost->getTailFoldingStyle();
2450-
if (Style == TailFoldingStyle::None)
2451-
CheckMinIters =
2452-
Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");
2453-
else if (VF.isScalable() &&
2454-
!isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
2455-
Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
2450+
if (Style == TailFoldingStyle::None) {
2451+
Value *Step = CreateStep();
2452+
ScalarEvolution &SE = *PSE.getSE();
2453+
// TODO: Emit unconditional branch to vector preheader instead of
2454+
// conditional branch with known condition.
2455+
const SCEV *TripCountSCEV = SE.applyLoopGuards(SE.getSCEV(Count), OrigLoop);
2456+
// Check if the trip count is < the step.
2457+
if (SE.isKnownPredicate(P, TripCountSCEV, SE.getSCEV(Step))) {
2458+
// TODO: Ensure step is at most the trip count when determining max VF and
2459+
// UF, w/o tail folding.
2460+
CheckMinIters = Builder.getTrue();
2461+
} else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(P),
2462+
TripCountSCEV, SE.getSCEV(Step))) {
2463+
// Generate the minimum iteration check only if we cannot prove the
2464+
// check is known to be true, or known to be false.
2465+
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
2466+
} // else step known to be < trip count, use CheckMinIters preset to false.
2467+
} else if (VF.isScalable() &&
2468+
!isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
2469+
Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
24562470
// vscale is not necessarily a power-of-2, which means we cannot guarantee
24572471
// an overflow to zero when updating induction variables and so an
24582472
// additional overflow check is required before entering the vector loop.
@@ -2462,8 +2476,18 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24622476
ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
24632477
Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
24642478

2479+
Value *Step = CreateStep();
2480+
#ifndef NDEBUG
2481+
ScalarEvolution &SE = *PSE.getSE();
2482+
const SCEV *TC2OverflowSCEV = SE.applyLoopGuards(SE.getSCEV(LHS), OrigLoop);
2483+
assert(
2484+
!isIndvarOverflowCheckKnownFalse(Cost, VF * UF) &&
2485+
!SE.isKnownPredicate(CmpInst::getInversePredicate(ICmpInst::ICMP_ULT),
2486+
TC2OverflowSCEV, SE.getSCEV(Step)) &&
2487+
"unexpectedly proved overflow check to be known");
2488+
#endif
24652489
// Don't execute the vector loop if (UMax - n) < (VF * UF).
2466-
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
2490+
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
24672491
}
24682492

24692493
// Create new preheader for vector loop.

0 commit comments

Comments
 (0)