Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions clang/lib/CIR/CodeGen/CIRGenExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,9 +1189,20 @@ LValue CIRGenFunction::emitExtVectorElementExpr(const ExtVectorElementExpr *e) {
base = emitLValue(e->getBase());
} else {
// Otherwise, the base is a normal rvalue (as in (V+V).x), emit it as such.
cgm.errorNYI(e->getSourceRange(),
"emitExtVectorElementExpr: base is a normal rvalue");
return {};
assert(e->getBase()->getType()->isVectorType() &&
"Result must be a vector");
mlir::Value vec = emitScalarExpr(e->getBase());

// Store the vector to memory (because LValue wants an address).
QualType baseTy = e->getBase()->getType();
Address vecMem = createMemTemp(baseTy, vec.getLoc(), "tmp");
if (baseTy->isExtVectorBoolType()) {
cgm.errorNYI(e->getSourceRange(),
"emitExtVectorElementExpr: VectorBoolType");
return {};
}
builder.createStore(vec.getLoc(), vec, vecMem);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Classic codegen has some recently added code here to handle a vector of bools by zero-extending. See #123977

We should update this to handle that case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added NYI because the Vector of bool is not supported yet

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added NYI because the Vector of bool is not supported yet

I guess what we need here is to substitute the NYI skeleton for checking on isPackedVectorBoolType in place of isExtVectorBoolType, but if that needs more work I'm fine if that comes as a next PR.

base = makeAddrLValue(vecMem, baseTy, AlignmentSource::Decl);
}

QualType type =
Expand Down
144 changes: 144 additions & 0 deletions clang/test/CIR/CodeGen/vector-ext-element.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,147 @@ void element_expr_from_pointer_with_vec_result() {
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
// OGCG: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// OGCG: store <4 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 16

void element_expr_from_rvalue() {
vi4 a;
vi4 b;
int x = (a + b).x;
int y = (a + b).y;
}

// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
// CIR: %[[X_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
// CIR: %[[TMP_1_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
// CIR: %[[Y_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init]
// CIR: %[[TMP_2_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_1_ADDR:.*]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[TMP_1:.*]] = cir.load {{.*}} %[[TMP_1_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i
// CIR: %[[ELEM_0:.*]] = cir.vec.extract %[[TMP_1]][%[[CONST_0]] : !s64i] : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[ELEM_0]], %[[X_ADDR]] : !s32i, !cir.ptr<!s32i>
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_2_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[TMP_2:.*]] = cir.load {{.*}} %[[TMP_2_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s64i
// CIR: %[[ELEM_1:.*]] = cir.vec.extract %[[TMP_2]][%[[CONST_1]] : !s64i] : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[ELEM_1]], %[[Y_ADDR]] : !s32i, !cir.ptr<!s32i>

// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[X_ADDR:.*]] = alloca i32, i64 1, align 4
// LLVM: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[Y_ADDR:.*]] = alloca i32, i64 1, align 4
// LLVM: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
// LLVM: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
// LLVM: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_1]], i64 0
// LLVM: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
// LLVM: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
// LLVM: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_2]], i64 1
// LLVM: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4

// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[X_ADDR:.*]] = alloca i32, align 4
// OGCG: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[Y_ADDR:.*]] = alloca i32, align 4
// OGCG: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
// OGCG: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
// OGCG: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_1]], i64 0
// OGCG: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
// OGCG: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
// OGCG: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_2]], i64 1
// OGCG: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4

void element_expr_from_rvalue_with_vec_result() {
vi4 a;
vi4 b;
vi2 c = (a + b).xy;
vi4 d = (a + b).wzyx;
}

// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["c", init]
// CIR: %[[TMP_1_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
// CIR: %[[D_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["d", init]
// CIR: %[[TMP_2_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_1_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[TMP_1:.*]] = cir.load {{.*}} %[[TMP_1_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
// CIR: %[[C_VALUE:.*]] = cir.vec.shuffle(%[[TMP_1]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
// CIR: cir.store {{.*}} %[[C_VALUE]], %[[C_ADDR]] : !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_2_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[TMP_2:.*]] = cir.load {{.*}} %[[TMP_2_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
// CIR: %[[D_VALUE:.*]] = cir.vec.shuffle(%[[TMP_2]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[D_VALUE]], %[[D_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>

// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[C_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
// LLVM: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[D_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
// LLVM: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
// LLVM: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
// LLVM: store <2 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 8
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
// LLVM: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
// LLVM: %[[D_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// LLVM: store <4 x i32> %[[D_VALUE]], ptr %[[D_ADDR]], align 16

// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[C_ADDR:.*]] = alloca <2 x i32>, align 8
// OGCG: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[D_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
// OGCG: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
// OGCG: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
// OGCG: store <2 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 8
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
// OGCG: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
// OGCG: %[[D_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// OGCG: store <4 x i32> %[[D_VALUE]], ptr %[[D_ADDR]], align 16
Loading