Skip to content

Commit 0c9cee9

Browse files
authored
Merge branch 'main' into optimize_transpose
2 parents 25277bb + fe5c3cb commit 0c9cee9

File tree

53 files changed

+1531
-565
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1531
-565
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4191,6 +4191,16 @@ def CIR_CosOp : CIR_UnaryFPToFPBuiltinOp<"cos", "CosOp"> {
41914191
}];
41924192
}
41934193

4194+
def CIR_ExpOp : CIR_UnaryFPToFPBuiltinOp<"exp", "ExpOp"> {
4195+
let summary = "Computes the floating-point base-e exponential value";
4196+
let description = [{
4197+
`cir.exp` computes the exponential of a floating-point operand and returns
4198+
a result of the same type.
4199+
4200+
Floating-point exceptions are ignored, and it does not set `errno`.
4201+
}];
4202+
}
4203+
41944204
def CIR_FAbsOp : CIR_UnaryFPToFPBuiltinOp<"fabs", "FAbsOp"> {
41954205
let summary = "Computes the floating-point absolute value";
41964206
let description = [{

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,17 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
222222
assert(!cir::MissingFeatures::fastMathFlags());
223223
return emitUnaryMaybeConstrainedFPBuiltin<cir::CeilOp>(*this, *e);
224224

225+
case Builtin::BIexp:
226+
case Builtin::BIexpf:
227+
case Builtin::BIexpl:
228+
case Builtin::BI__builtin_exp:
229+
case Builtin::BI__builtin_expf:
230+
case Builtin::BI__builtin_expf16:
231+
case Builtin::BI__builtin_expl:
232+
case Builtin::BI__builtin_expf128:
233+
assert(!cir::MissingFeatures::fastMathFlags());
234+
return emitUnaryMaybeConstrainedFPBuiltin<cir::ExpOp>(*this, *e);
235+
225236
case Builtin::BIfabs:
226237
case Builtin::BIfabsf:
227238
case Builtin::BIfabsl:

clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,8 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
343343
cgf.cgm.errorNYI(e->getSourceRange(), "AggExprEmitter: VisitNoInitExpr");
344344
}
345345
void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *dae) {
346-
cgf.cgm.errorNYI(dae->getSourceRange(),
347-
"AggExprEmitter: VisitCXXDefaultArgExpr");
346+
CIRGenFunction::CXXDefaultArgExprScope scope(cgf, dae);
347+
Visit(dae->getExpr());
348348
}
349349
void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *e) {
350350
cgf.cgm.errorNYI(e->getSourceRange(),

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,14 @@ mlir::LogicalResult CIRToLLVMCosOpLowering::matchAndRewrite(
194194
return mlir::success();
195195
}
196196

197+
mlir::LogicalResult CIRToLLVMExpOpLowering::matchAndRewrite(
198+
cir::ExpOp op, OpAdaptor adaptor,
199+
mlir::ConversionPatternRewriter &rewriter) const {
200+
mlir::Type resTy = typeConverter->convertType(op.getType());
201+
rewriter.replaceOpWithNewOp<mlir::LLVM::ExpOp>(op, resTy, adaptor.getSrc());
202+
return mlir::success();
203+
}
204+
197205
static mlir::Value getLLVMIntCast(mlir::ConversionPatternRewriter &rewriter,
198206
mlir::Value llvmSrc, mlir::Type llvmDstIntTy,
199207
bool isUnsigned, uint64_t cirSrcWidth,

clang/lib/Driver/Driver.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3857,6 +3857,9 @@ class OffloadingActionBuilder final {
38573857
/// Flag set to true if all valid builders allow file bundling/unbundling.
38583858
bool CanUseBundler;
38593859

3860+
/// Flag set to false if an argument turns off bundling.
3861+
bool ShouldUseBundler;
3862+
38603863
public:
38613864
OffloadingActionBuilder(Compilation &C, DerivedArgList &Args,
38623865
const Driver::InputList &Inputs)
@@ -3891,6 +3894,9 @@ class OffloadingActionBuilder final {
38913894
}
38923895
CanUseBundler =
38933896
ValidBuilders && ValidBuilders == ValidBuildersSupportingBundling;
3897+
3898+
ShouldUseBundler = Args.hasFlag(options::OPT_gpu_bundle_output,
3899+
options::OPT_no_gpu_bundle_output, true);
38943900
}
38953901

38963902
~OffloadingActionBuilder() {
@@ -4042,11 +4048,11 @@ class OffloadingActionBuilder final {
40424048
SB->appendTopLevelActions(OffloadAL);
40434049
}
40444050

4045-
// If we can use the bundler, replace the host action by the bundling one in
4046-
// the resulting list. Otherwise, just append the device actions. For
4047-
// device only compilation, HostAction is a null pointer, therefore only do
4048-
// this when HostAction is not a null pointer.
4049-
if (CanUseBundler && HostAction &&
4051+
// If we can and should use the bundler, replace the host action by the
4052+
// bundling one in the resulting list. Otherwise, just append the device
4053+
// actions. For device only compilation, HostAction is a null pointer,
4054+
// therefore only do this when HostAction is not a null pointer.
4055+
if (CanUseBundler && ShouldUseBundler && HostAction &&
40504056
HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) {
40514057
// Add the host action to the list in order to create the bundling action.
40524058
OffloadAL.push_back(HostAction);

clang/test/Analysis/LifetimeSafety/CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,21 @@ set(LIFETIME_BENCHMARK_REQUIREMENTS
1515
set(LIFETIME_BENCHMARK_OUTPUT_DIR
1616
"${CMAKE_CURRENT_BINARY_DIR}/benchmark_results")
1717

18+
if(WIN32)
19+
set(LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE
20+
"${LIFETIME_BENCHMARK_VENV_DIR}/Scripts/python")
21+
else()
22+
set(LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE
23+
"${LIFETIME_BENCHMARK_VENV_DIR}/bin/python")
24+
endif()
1825

1926
if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREMENTS})
2027

2128
# Set up the virtual environment and install packages
2229
add_custom_command(
2330
OUTPUT ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg
2431
COMMAND ${Python3_EXECUTABLE} -m venv ${LIFETIME_BENCHMARK_VENV_DIR}
25-
COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS}
32+
COMMAND ${LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE} -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS}
2633
DEPENDS ${LIFETIME_BENCHMARK_REQUIREMENTS}
2734
COMMENT "Creating Python virtual environment and installing dependencies for benchmark..."
2835
)
@@ -32,7 +39,7 @@ if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREME
3239

3340
# Main benchmark target
3441
add_custom_target(benchmark_lifetime_safety_analysis
35-
COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python ${LIFETIME_BENCHMARK_SCRIPT}
42+
COMMAND ${LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE} ${LIFETIME_BENCHMARK_SCRIPT}
3643
--clang-binary ${LLVM_BINARY_DIR}/bin/clang
3744
--output-dir ${LIFETIME_BENCHMARK_OUTPUT_DIR}
3845

clang/test/CIR/CodeGen/builtins-floating-point.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,24 @@ float ceil(float f) {
2525
// LLVM: %{{.*}} = call float @llvm.ceil.f32(float %{{.*}})
2626
// OGCG: %{{.*}} = call float @llvm.ceil.f32(float %{{.*}})
2727
}
28+
29+
float expf(float f) {
30+
return __builtin_expf(f);
31+
// CIR: %{{.*}} = cir.exp {{.*}} : !cir.float
32+
// LLVM: %{{.*}} = call float @llvm.exp.f32(float %{{.*}})
33+
// OGCG: %{{.*}} = call float @llvm.exp.f32(float %{{.*}})
34+
}
35+
36+
double exp(double f) {
37+
return __builtin_exp(f);
38+
// CIR: %{{.*}} = cir.exp {{.*}} : !cir.double
39+
// LLVM: %{{.*}} = call double @llvm.exp.f64(double %{{.*}})
40+
// OGCG: %{{.*}} = call double @llvm.exp.f64(double %{{.*}})
41+
}
42+
43+
long double expl(long double f) {
44+
return __builtin_expl(f);
45+
// CIR: %{{.*}} = cir.exp {{.*}} : !cir.long_double<!cir.f128>
46+
// LLVM: %{{.*}} = call fp128 @llvm.exp.f128(fp128 %{{.*}})
47+
// OGCG: %{{.*}} = call fp128 @llvm.exp.f128(fp128 %{{.*}})
48+
}

clang/test/CIR/CodeGen/struct.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,3 +344,47 @@ void struct_with_const_member_expr() {
344344
// OGCG: %[[BF_SET:.*]] = or i8 %[[BF_CLEAR]], 0
345345
// OGCG: store i8 %[[BF_SET]], ptr %[[REF_ADDR]], align 4
346346
// OGCG: store i32 0, ptr %[[A_ADDR]], align 4
347+
348+
void function_arg_with_default_value(CompleteS a = {1, 2}) {}
349+
350+
// CIR: %[[ARG_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["a", init]
351+
// CIR: cir.store %{{.*}}, %[[ARG_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS>
352+
353+
// LLVM: %[[ARG_ADDR:.*]] = alloca %struct.CompleteS, i64 1, align 4
354+
// LLVM: store %struct.CompleteS %{{.*}}, ptr %[[ARG_ADDR]], align 4
355+
356+
// OGCG: %[[ARG_ADDR:.*]] = alloca %struct.CompleteS, align 4
357+
// OGCG: store i64 %{{.*}}, ptr %[[ARG_ADDR]], align 4
358+
359+
void calling_function_with_default_values() {
360+
function_arg_with_default_value();
361+
}
362+
363+
// CIR: %[[AGG_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["agg.tmp0"]
364+
// CIR: %[[ELEM_0_PTR:.*]] = cir.get_member %[[AGG_ADDR]][0] {name = "a"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s32i>
365+
// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
366+
// CIR: cir.store{{.*}} %[[CONST_1]], %[[ELEM_0_PTR]] : !s32i, !cir.ptr<!s32i>
367+
// CIR: %[[ELEM_1_PTR:.*]] = cir.get_member %[[AGG_ADDR]][1] {name = "b"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s8i>
368+
// CIR: %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i
369+
// CIR: %[[CONST_2_I8:.*]] = cir.cast integral %[[CONST_2]] : !s32i -> !s8i
370+
// CIR: cir.store{{.*}} %[[CONST_2_I8]], %[[ELEM_1_PTR]] : !s8i, !cir.ptr<!s8i>
371+
// CIR: %[[TMP_AGG:.*]] = cir.load{{.*}} %[[AGG_ADDR]] : !cir.ptr<!rec_CompleteS>, !rec_CompleteS
372+
// CIR: cir.call @_Z31function_arg_with_default_value9CompleteS(%[[TMP_AGG]]) : (!rec_CompleteS) -> ()
373+
374+
// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering,
375+
376+
// LLVM: %[[AGG_ADDR:.*]] = alloca %struct.CompleteS, i64 1, align 4
377+
// LLVM: %[[ELEM_0_PTR:.*]] = getelementptr %struct.CompleteS, ptr %[[AGG_ADDR]], i32 0, i32 0
378+
// LLVM: store i32 1, ptr %[[ELEM_0_PTR]], align 4
379+
// LLVM: %[[ELEM_1_PTR:.*]] = getelementptr %struct.CompleteS, ptr %[[AGG_ADDR]], i32 0, i32 1
380+
// LLVM: store i8 2, ptr %[[ELEM_1_PTR]], align 4
381+
// LLVM: %[[TMP_AGG:.*]] = load %struct.CompleteS, ptr %[[AGG_ADDR]], align 4
382+
// LLVM: call void @_Z31function_arg_with_default_value9CompleteS(%struct.CompleteS %[[TMP_AGG]])
383+
384+
// OGCG: %[[AGG_ADDR:.*]] = alloca %struct.CompleteS, align 4
385+
// OGCG: %[[ELEM_0_PTR:.*]] = getelementptr inbounds nuw %struct.CompleteS, ptr %[[AGG_ADDR]], i32 0, i32 0
386+
// OGCG: store i32 1, ptr %[[ELEM_0_PTR]], align 4
387+
// OGCG: %[[ELEM_1_PTR:.*]] = getelementptr inbounds nuw %struct.CompleteS, ptr %[[AGG_ADDR]], i32 0, i32 1
388+
// OGCG: store i8 2, ptr %[[ELEM_1_PTR]], align 4
389+
// OGCG: %[[TMP_AGG:.*]] = load i64, ptr %[[AGG_ADDR]], align 4
390+
// OGCG: call void @_Z31function_arg_with_default_value9CompleteS(i64 %[[TMP_AGG]])
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang -ccc-print-phases -c -emit-llvm \
2+
// RUN: --offload-arch=gfx900,gfx1030 -O3 -x hip %s \
3+
// RUN: 2>&1 | FileCheck %s --check-prefix=BUNDLE
4+
5+
// RUN: %clang -ccc-print-phases -c -emit-llvm \
6+
// RUN: --gpu-bundle-output --offload-arch=gfx900,gfx1030 -O3 -x hip %s \
7+
// RUN: 2>&1 | FileCheck %s --check-prefix=BUNDLE
8+
9+
// RUN: %clang -ccc-print-phases -c -emit-llvm \
10+
// RUN: --no-gpu-bundle-output --offload-arch=gfx900,gfx1030 -O3 -x hip %s \
11+
// RUN: 2>&1 | FileCheck %s --check-prefixes=COMPILER,GFX1030,GFX900,OFFLOAD,NOBUNDLE
12+
13+
// BUNDLE: clang-offload-bundler
14+
// NOBUNDLE-NOT: clang-offload-bundler
15+
16+
// COM: sanity checks
17+
// COMPILER: compiler
18+
// GFX1030: (device-hip, gfx1030)
19+
// GFX900: (device-hip, gfx900)
20+
// OFFLOAD: offload
21+
22+
int square(int num) {
23+
return num * num;
24+
}

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,7 @@ set(TARGET_LIBC_ENTRYPOINTS
325325
libc.src.unistd.dup2
326326
libc.src.unistd.dup3
327327
libc.src.unistd.execve
328-
# Disabled while SYS_faccessat2 is unavailable on the buildbot.
329-
# libc.src.unistd.faccessat
328+
libc.src.unistd.faccessat
330329
libc.src.unistd.fchdir
331330
libc.src.unistd.fpathconf
332331
libc.src.unistd.fsync

0 commit comments

Comments
 (0)