Skip to content

Commit 71909ba

Browse files
committed
Merge branch 'main' into review/benchmarks-refactor-sequence-containers
2 parents 67764e1 + d35d7f4 commit 71909ba

File tree

9 files changed

+133
-40
lines changed

9 files changed

+133
-40
lines changed

.github/workflows/build-ci-container.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,22 @@ jobs:
3636
tag=`date +%s`
3737
container_name="ghcr.io/$GITHUB_REPOSITORY_OWNER/ci-ubuntu-22.04"
3838
echo "container-name=$container_name" >> $GITHUB_OUTPUT
39+
echo "container-name-agent=$container_name-agent" >> $GITHUB_OUTPUT
3940
echo "container-name-tag=$container_name:$tag" >> $GITHUB_OUTPUT
41+
echo "container-name-agent-tag=$container_name-agent:$tag" >> $GITHUB_OUTPUT
4042
echo "container-filename=$(echo $container_name:$tag | sed -e 's/\//-/g' -e 's/:/-/g').tar" >> $GITHUB_OUTPUT
4143
- name: Build container
4244
working-directory: ./.github/workflows/containers/github-action-ci/
4345
run: |
44-
podman build -t ${{ steps.vars.outputs.container-name-tag }} .
46+
podman build --target ci-container -t ${{ steps.vars.outputs.container-name-tag }} .
47+
podman build --target ci-container-agent -t ${{ steps.vars.outputs.container-name-agent-tag }} .
4548
4649
# Save the container so we have it in case the push fails. This also
4750
# allows us to separate the push step into a different job so we can
4851
# maintain minimal permissions while building the container.
4952
- name: Save container image
5053
run: |
51-
podman save ${{ steps.vars.outputs.container-name-tag }} > ${{ steps.vars.outputs.container-filename }}
54+
podman save ${{ steps.vars.outputs.container-name-tag }} ${{ steps.vars.outputs.container-name-agent-tag }} > ${{ steps.vars.outputs.container-filename }}
5255
5356
- name: Upload container image
5457
uses: actions/upload-artifact@v4
@@ -86,3 +89,7 @@ jobs:
8689
podman login -u ${{ github.actor }} -p $GITHUB_TOKEN ghcr.io
8790
podman push ${{ needs.build-ci-container.outputs.container-name-tag }}
8891
podman push ${{ needs.build-ci-container.outputs.container-name }}:latest
92+
93+
podman tag ${{ needs.build-ci-container.outpus.container-name-agent-tag }} ${{ needs.build-ci-container.outputs.container-name-agent }}:latest
94+
podman push ${{ needs.build-ci-container.outputs.container-name-agent-tag }}
95+
podman push ${{ needs.build-ci-container.outputs.container-name-agent }}:latest

.github/workflows/containers/github-action-ci/Dockerfile

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ RUN apt-get update && \
1313
ninja-build \
1414
python3 \
1515
git \
16-
curl
16+
curl \
17+
zlib1g-dev
1718

1819
RUN curl -O -L https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-$LLVM_VERSION.tar.gz && tar -xf llvmorg-$LLVM_VERSION.tar.gz
1920

@@ -38,7 +39,7 @@ RUN cmake -B ./build -G Ninja ./llvm \
3839

3940
RUN ninja -C ./build stage2-clang-bolt stage2-install-distribution && ninja -C ./build install-distribution
4041

41-
FROM base
42+
FROM base as ci-container
4243

4344
COPY --from=stage1-toolchain $LLVM_SYSROOT $LLVM_SYSROOT
4445

@@ -91,4 +92,15 @@ RUN adduser gha sudo
9192
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
9293

9394
USER gha
95+
WORKDIR /home/gha
96+
97+
FROM ci-container as ci-container-agent
98+
99+
ENV GITHUB_RUNNER_VERSION=2.321.0
100+
101+
RUN mkdir actions-runner && \
102+
cd actions-runner && \
103+
curl -O -L https://github.com/actions/runner/releases/download/v$GITHUB_RUNNER_VERSION/actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz && \
104+
tar xzf ./actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz && \
105+
rm ./actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz
94106

libcxx/include/future

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1840,7 +1840,7 @@ class _LIBCPP_HIDDEN __async_func {
18401840
tuple<_Fp, _Args...> __f_;
18411841

18421842
public:
1843-
using _Rp = __invoke_result_t<_Fp, _Args...>;
1843+
using _Rp _LIBCPP_NODEBUG = __invoke_result_t<_Fp, _Args...>;
18441844

18451845
_LIBCPP_HIDE_FROM_ABI explicit __async_func(_Fp&& __f, _Args&&... __args)
18461846
: __f_(std::move(__f), std::move(__args)...) {}

llvm/Maintainers.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -333,10 +333,10 @@ [email protected] (email), [kuhar](https://github.com/kuhar) (GitHub)
333333
Peter Collingbourne \
334334
[email protected] (email), [pcc](https://github.com/pcc) (GitHub)
335335

336-
#### CMake and library layering
336+
#### CMake
337337

338-
Chandler Carruth \
339-
[email protected], chandlerc@google.com (email), [chandlerc](https://github.com/chandlerc) (GitHub)
338+
Petr Hosek \
339+
phosek@google.com (email), [petrhosek](https://github.com/petrhosek) (GitHub)
340340

341341
#### Debug info and DWARF
342342

@@ -356,6 +356,11 @@ [email protected] (email), [echristo](https://github.com/echristo) (GitHub)
356356
Teresa Johnson \
357357
[email protected] (email), [teresajohnson](https://github.com/teresajohnson) (GitHub)
358358

359+
#### Library layering
360+
361+
Takumi Nakamura \
362+
[email protected] (email), [chapuni](https://github.com/chapuni) (GitHub)
363+
359364
#### MCJIT, Orc, RuntimeDyld, PerfJITEvents
360365

361366
Lang Hames \
@@ -477,7 +482,7 @@ [email protected] (email), [lattner](https://github.com/lattner) (GitHub), clattn
477482

478483
Paul C. Anagnostopoulos ([email protected], [Paul-C-Anagnostopoulos](https://github.com/Paul-C-Anagnostopoulos)) -- TableGen \
479484
Justin Bogner ([email protected], [bogner](https://github.com/bogner)) -- SelectionDAG \
480-
Chandler Carruth ([email protected], [email protected], [chandlerc](https://github.com/chandlerc)) -- ADT, Support, Inlining \
485+
Chandler Carruth ([email protected], [email protected], [chandlerc](https://github.com/chandlerc)) -- ADT, Support, Inlining, CMake and library layering \
481486
Peter Collingbourne ([email protected], [pcc](https://github.com/pcc)) -- LTO \
482487
Evan Cheng ([email protected]) -- Parts of code generator not covered by someone else \
483488
Jake Ehrlich ([email protected], [jakehehrlich](https://github.com/jakehehrlich)) -- llvm-objcopy and ObjCopy library \

llvm/include/llvm/CodeGen/LiveRegMatrix.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class LiveRegMatrix {
161161
/// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
162162
/// This returns a reference to an internal Query data structure that is only
163163
/// valid until the next query() call.
164-
LiveIntervalUnion::Query &query(const LiveRange &LR, MCRegister RegUnit);
164+
LiveIntervalUnion::Query &query(const LiveRange &LR, MCRegUnit RegUnit);
165165

166166
/// Directly access the live interval unions per regunit.
167167
/// This returns an array indexed by the regunit number.

llvm/lib/CodeGen/LiveRegMatrix.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ bool LiveRegMatrix::checkRegUnitInterference(const LiveInterval &VirtReg,
184184
}
185185

186186
LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
187-
MCRegister RegUnit) {
187+
MCRegUnit RegUnit) {
188188
LiveIntervalUnion::Query &Q = Queries[RegUnit];
189189
Q.init(UserTag, LR, Matrix[RegUnit]);
190190
return Q;
@@ -206,7 +206,7 @@ LiveRegMatrix::checkInterference(const LiveInterval &VirtReg,
206206

207207
// Check the matrix for virtual register interference.
208208
bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
209-
[&](MCRegister Unit, const LiveRange &LR) {
209+
[&](MCRegUnit Unit, const LiveRange &LR) {
210210
return query(LR, Unit).checkInterference();
211211
});
212212
if (Interference)

mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,16 @@
1616

1717
namespace mlir {
1818

19-
/// Rewriting that replace SourceOp with a CallOp to `f32Func` or `f64Func` or
20-
/// `f32ApproxFunc` or `f16Func` depending on the element type and the
21-
/// fastMathFlag of that Op. The function declaration is added in case it was
22-
/// not added before.
19+
namespace {
20+
/// Detection trait tor the `getFastmath` instance method.
21+
template <typename T>
22+
using has_get_fastmath_t = decltype(std::declval<T>().getFastmath());
23+
} // namespace
24+
25+
/// Rewriting that replaces SourceOp with a CallOp to `f32Func` or `f64Func` or
26+
/// `f32ApproxFunc` or `f16Func` or `i32Type` depending on the element type and
27+
/// the fastMathFlag of that Op, if present. The function declaration is added
28+
/// in case it was not added before.
2329
///
2430
/// If the input values are of bf16 type (or f16 type if f16Func is empty), the
2531
/// value is first casted to f32, the function called and then the result casted
@@ -39,14 +45,22 @@ namespace mlir {
3945
///
4046
/// will be transformed into
4147
/// llvm.call @__nv_fast_expf(%arg_f32) : (f32) -> f32
48+
///
49+
/// Final example with NVVM:
50+
/// %pow_f32 = math.fpowi %arg_f32, %arg_i32
51+
///
52+
/// will be transformed into
53+
/// llvm.call @__nv_powif(%arg_f32, %arg_i32) : (f32, i32) -> f32
4254
template <typename SourceOp>
4355
struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
4456
public:
4557
explicit OpToFuncCallLowering(const LLVMTypeConverter &lowering,
4658
StringRef f32Func, StringRef f64Func,
47-
StringRef f32ApproxFunc, StringRef f16Func)
59+
StringRef f32ApproxFunc, StringRef f16Func,
60+
StringRef i32Func = "")
4861
: ConvertOpToLLVMPattern<SourceOp>(lowering), f32Func(f32Func),
49-
f64Func(f64Func), f32ApproxFunc(f32ApproxFunc), f16Func(f16Func) {}
62+
f64Func(f64Func), f32ApproxFunc(f32ApproxFunc), f16Func(f16Func),
63+
i32Func(i32Func) {}
5064

5165
LogicalResult
5266
matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
@@ -76,9 +90,8 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
7690

7791
Type resultType = castedOperands.front().getType();
7892
Type funcType = getFunctionType(resultType, castedOperands);
79-
StringRef funcName =
80-
getFunctionName(cast<LLVM::LLVMFunctionType>(funcType).getReturnType(),
81-
op.getFastmath());
93+
StringRef funcName = getFunctionName(
94+
cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
8295
if (funcName.empty())
8396
return failure();
8497

@@ -91,14 +104,15 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
91104
return success();
92105
}
93106

107+
assert(callOp.getResult().getType().isF32() &&
108+
"only f32 types are supposed to be truncated back");
94109
Value truncated = rewriter.create<LLVM::FPTruncOp>(
95110
op->getLoc(), adaptor.getOperands().front().getType(),
96111
callOp.getResult());
97112
rewriter.replaceOp(op, {truncated});
98113
return success();
99114
}
100115

101-
private:
102116
Value maybeCast(Value operand, PatternRewriter &rewriter) const {
103117
Type type = operand.getType();
104118
if (!isa<Float16Type, BFloat16Type>(type))
@@ -117,38 +131,50 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
117131
return LLVM::LLVMFunctionType::get(resultType, operandTypes);
118132
}
119133

120-
StringRef getFunctionName(Type type, arith::FastMathFlags flag) const {
121-
if (isa<Float16Type>(type))
122-
return f16Func;
123-
if (isa<Float32Type>(type)) {
124-
if (((uint32_t)arith::FastMathFlags::afn & (uint32_t)flag) &&
125-
!f32ApproxFunc.empty())
126-
return f32ApproxFunc;
127-
else
128-
return f32Func;
129-
}
130-
if (isa<Float64Type>(type))
131-
return f64Func;
132-
return "";
133-
}
134-
135134
LLVM::LLVMFuncOp appendOrGetFuncOp(StringRef funcName, Type funcType,
136135
Operation *op) const {
137136
using LLVM::LLVMFuncOp;
138137

139138
auto funcAttr = StringAttr::get(op->getContext(), funcName);
140-
Operation *funcOp = SymbolTable::lookupNearestSymbolFrom(op, funcAttr);
139+
auto funcOp =
140+
SymbolTable::lookupNearestSymbolFrom<LLVMFuncOp>(op, funcAttr);
141141
if (funcOp)
142-
return cast<LLVMFuncOp>(*funcOp);
142+
return funcOp;
143143

144-
mlir::OpBuilder b(op->getParentOfType<FunctionOpInterface>());
144+
auto parentFunc = op->getParentOfType<FunctionOpInterface>();
145+
assert(parentFunc && "expected there to be a parent function");
146+
OpBuilder b(parentFunc);
145147
return b.create<LLVMFuncOp>(op->getLoc(), funcName, funcType);
146148
}
147149

150+
StringRef getFunctionName(Type type, SourceOp op) const {
151+
bool useApprox = false;
152+
if constexpr (llvm::is_detected<has_get_fastmath_t, SourceOp>::value) {
153+
arith::FastMathFlags flag = op.getFastmath();
154+
useApprox = ((uint32_t)arith::FastMathFlags::afn & (uint32_t)flag) &&
155+
!f32ApproxFunc.empty();
156+
}
157+
158+
if (isa<Float16Type>(type))
159+
return f16Func;
160+
if (isa<Float32Type>(type)) {
161+
if (useApprox)
162+
return f32ApproxFunc;
163+
return f32Func;
164+
}
165+
if (isa<Float64Type>(type))
166+
return f64Func;
167+
168+
if (type.isInteger(32))
169+
return i32Func;
170+
return "";
171+
}
172+
148173
const std::string f32Func;
149174
const std::string f64Func;
150175
const std::string f32ApproxFunc;
151176
const std::string f16Func;
177+
const std::string i32Func;
152178
};
153179

154180
} // namespace mlir

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,22 @@ static void populateOpPatterns(const LLVMTypeConverter &converter,
446446
f32ApproxFunc, f16Func);
447447
}
448448

449+
template <typename OpTy>
450+
static void populateIntOpPatterns(const LLVMTypeConverter &converter,
451+
RewritePatternSet &patterns,
452+
StringRef i32Func) {
453+
patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
454+
patterns.add<OpToFuncCallLowering<OpTy>>(converter, "", "", "", "", i32Func);
455+
}
456+
457+
template <typename OpTy>
458+
static void populateFloatIntOpPatterns(const LLVMTypeConverter &converter,
459+
RewritePatternSet &patterns,
460+
StringRef f32Func, StringRef f64Func) {
461+
patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
462+
patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func, "", "");
463+
}
464+
449465
void mlir::populateGpuSubgroupReduceOpLoweringPattern(
450466
const LLVMTypeConverter &converter, RewritePatternSet &patterns) {
451467
patterns.add<GPUSubgroupReduceOpLowering>(converter);
@@ -509,6 +525,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
509525

510526
populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
511527
"__nv_fmod");
528+
populateIntOpPatterns<math::AbsIOp>(converter, patterns, "__nv_abs");
512529
populateOpPatterns<math::AbsFOp>(converter, patterns, "__nv_fabsf",
513530
"__nv_fabs");
514531
populateOpPatterns<math::AcosOp>(converter, patterns, "__nv_acosf",
@@ -555,6 +572,8 @@ void mlir::populateGpuToNVVMConversionPatterns(
555572
"__nv_log2", "__nv_fast_log2f");
556573
populateOpPatterns<math::PowFOp>(converter, patterns, "__nv_powf", "__nv_pow",
557574
"__nv_fast_powf");
575+
populateFloatIntOpPatterns<math::FPowIOp>(converter, patterns, "__nv_powif",
576+
"__nv_powi");
558577
populateOpPatterns<math::RoundOp>(converter, patterns, "__nv_roundf",
559578
"__nv_round");
560579
populateOpPatterns<math::RoundEvenOp>(converter, patterns, "__nv_rintf",

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,3 +1033,27 @@ module attributes {transform.with_named_sequence} {
10331033
transform.yield
10341034
}
10351035
}
1036+
1037+
1038+
gpu.module @test_module_52 {
1039+
// CHECK: llvm.func @__nv_abs(i32) -> i32
1040+
// CHECK-LABEL: func @gpu_abs
1041+
func.func @gpu_abs(%arg_i32 : i32) -> (i32) {
1042+
%result32 = math.absi %arg_i32 : i32
1043+
// CHECK: llvm.call @__nv_abs(%{{.*}}) : (i32) -> i32
1044+
func.return %result32 : i32
1045+
}
1046+
}
1047+
1048+
gpu.module @test_module_53 {
1049+
// CHECK: llvm.func @__nv_powif(f32, i32) -> f32
1050+
// CHECK: llvm.func @__nv_powi(f64, i32) -> f64
1051+
// CHECK-LABEL: func @gpu_powi
1052+
func.func @gpu_powi(%arg_f32 : f32, %arg_f64 : f64, %arg_i32 : i32) -> (f32, f64) {
1053+
%result32 = math.fpowi %arg_f32, %arg_i32 : f32, i32
1054+
// CHECK: llvm.call @__nv_powif(%{{.*}}, %{{.*}}) : (f32, i32) -> f32
1055+
%result64 = math.fpowi %arg_f64, %arg_i32 : f64, i32
1056+
// CHECK: llvm.call @__nv_powi(%{{.*}}, %{{.*}}) : (f64, i32) -> f64
1057+
func.return %result32, %result64 : f32, f64
1058+
}
1059+
}

0 commit comments

Comments
 (0)