[CIR] ExtVectorElementExpr with rvalue base #168260

AmrDeveloper · 2025-11-16T09:33:22Z

Upstream ExtVectorElementExpr with rvalue base

llvmbot · 2025-11-16T09:33:54Z

@llvm/pr-subscribers-clangir

@llvm/pr-subscribers-clang

Author: Amr Hesham (AmrDeveloper)

Changes

Upstream ExtVectorElementExpr with rvalue base

Full diff: https://github.com/llvm/llvm-project/pull/168260.diff

2 Files Affected:

(modified) clang/lib/CIR/CodeGen/CIRGenExpr.cpp (+9-3)
(modified) clang/test/CIR/CodeGen/vector-ext-element.cpp (+144)

diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index d35bb0af0de14..25af52e68856c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1189,9 +1189,15 @@ LValue CIRGenFunction::emitExtVectorElementExpr(const ExtVectorElementExpr *e) {
     base = emitLValue(e->getBase());
   } else {
     // Otherwise, the base is a normal rvalue (as in (V+V).x), emit it as such.
-    cgm.errorNYI(e->getSourceRange(),
-                 "emitExtVectorElementExpr: base is a normal rvalue");
-    return {};
+    assert(e->getBase()->getType()->isVectorType() &&
+           "Result must be a vector");
+    mlir::Value vec = emitScalarExpr(e->getBase());
+
+    // Store the vector to memory (because LValue wants an address).
+    QualType baseTy = e->getBase()->getType();
+    Address vecMem = createMemTemp(baseTy, vec.getLoc(), "tmp");
+    builder.createStore(vec.getLoc(), vec, vecMem);
+    base = makeAddrLValue(vecMem, baseTy, AlignmentSource::Decl);
   }
 
   QualType type =
diff --git a/clang/test/CIR/CodeGen/vector-ext-element.cpp b/clang/test/CIR/CodeGen/vector-ext-element.cpp
index 18542ee40b325..87a29006d9970 100644
--- a/clang/test/CIR/CodeGen/vector-ext-element.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext-element.cpp
@@ -171,3 +171,147 @@ void element_expr_from_pointer_with_vec_result() {
 // OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
 // OGCG: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 // OGCG: store <4 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 16
+
+void element_expr_from_rvalue() {
+  vi4 a;
+  vi4 b;
+  int x = (a + b).x;
+  int y = (a + b).y;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
+// CIR: %[[X_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CIR: %[[TMP_1_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
+// CIR: %[[Y_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init]
+// CIR: %[[TMP_2_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_1_ADDR:.*]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: %[[TMP_1:.*]] = cir.load {{.*}} %[[TMP_1_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i
+// CIR: %[[ELEM_0:.*]] = cir.vec.extract %[[TMP_1]][%[[CONST_0]] : !s64i] : !cir.vector<4 x !s32i>
+// CIR: cir.store {{.*}} %[[ELEM_0]], %[[X_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_2_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: %[[TMP_2:.*]] = cir.load {{.*}} %[[TMP_2_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s64i
+// CIR: %[[ELEM_1:.*]] = cir.vec.extract %[[TMP_2]][%[[CONST_1]] : !s64i] : !cir.vector<4 x !s32i>
+// CIR: cir.store {{.*}} %[[ELEM_1]], %[[Y_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[X_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[Y_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_1]], i64 0
+// LLVM: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_2]], i64 1
+// LLVM: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[X_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[Y_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_1]], i64 0
+// OGCG: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_2]], i64 1
+// OGCG: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+void element_expr_from_rvalue_with_vec_result() {
+  vi4 a;
+  vi4 b;
+  vi2 c = (a + b).xy;
+  vi4 d = (a + b).wzyx;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["c", init]
+// CIR: %[[TMP_1_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
+// CIR: %[[D_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["d", init]
+// CIR: %[[TMP_2_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["tmp"]
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_1_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: %[[TMP_1:.*]] = cir.load {{.*}} %[[TMP_1_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
+// CIR: %[[C_VALUE:.*]] = cir.vec.shuffle(%[[TMP_1]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
+// CIR: cir.store {{.*}} %[[C_VALUE]], %[[C_ADDR]] : !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<4 x !s32i>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_2_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: %[[TMP_2:.*]] = cir.load {{.*}} %[[TMP_2_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
+// CIR: %[[D_VALUE:.*]] = cir.vec.shuffle(%[[TMP_2]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !s32i>
+// CIR: cir.store {{.*}} %[[D_VALUE]], %[[D_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[C_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[D_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// LLVM: store <2 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[D_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// LLVM: store <4 x i32> %[[D_VALUE]], ptr %[[D_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[C_ADDR:.*]] = alloca <2 x i32>, align 8
+// OGCG: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[D_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// OGCG: store <2 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[D_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// OGCG: store <4 x i32> %[[D_VALUE]], ptr %[[D_ADDR]], align 16

andykaylor · 2025-11-17T19:45:24Z

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

+    // Store the vector to memory (because LValue wants an address).
+    QualType baseTy = e->getBase()->getType();
+    Address vecMem = createMemTemp(baseTy, vec.getLoc(), "tmp");
+    builder.createStore(vec.getLoc(), vec, vecMem);


Classic codegen has some recently added code here to handle a vector of bools by zero-extending. See #123977

We should update this to handle that case.

I added NYI because the Vector of bool is not supported yet

I added NYI because the Vector of bool is not supported yet

I guess what we need here is to substitute the NYI skeleton for checking on isPackedVectorBoolType in place of isExtVectorBoolType, but if that needs more work I'm fine if that comes as a next PR.

github-actions · 2025-11-17T21:18:14Z

🐧 Linux x64 Test Results

112046 tests passed
4077 tests skipped

bcardosolopes

LGTM, minor suggestion in the comment

bcardosolopes · 2025-11-18T19:14:40Z

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

+    // Store the vector to memory (because LValue wants an address).
+    QualType baseTy = e->getBase()->getType();
+    Address vecMem = createMemTemp(baseTy, vec.getLoc(), "tmp");
+    builder.createStore(vec.getLoc(), vec, vecMem);


I added NYI because the Vector of bool is not supported yet

I guess what we need here is to substitute the NYI skeleton for checking on isPackedVectorBoolType in place of isExtVectorBoolType, but if that needs more work I'm fine if that comes as a next PR.

[CIR] ExtVectorElementExpr with rvalue base

4a779bd

AmrDeveloper requested review from andykaylor, bcardosolopes, lanza and xlauko as code owners November 16, 2025 09:33

llvmbot added clang Clang issues not falling into any other category ClangIR Anything related to the ClangIR project labels Nov 16, 2025

andykaylor reviewed Nov 17, 2025

View reviewed changes

Add NYI for VecBoolType

affda35

bcardosolopes approved these changes Nov 18, 2025

View reviewed changes

andykaylor approved these changes Nov 18, 2025

View reviewed changes

Address code review comments

838aa24

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[CIR] ExtVectorElementExpr with rvalue base #168260

[CIR] ExtVectorElementExpr with rvalue base #168260

AmrDeveloper commented Nov 16, 2025

Uh oh!

llvmbot commented Nov 16, 2025 •

edited

Loading

Uh oh!

andykaylor Nov 17, 2025

Uh oh!

AmrDeveloper Nov 17, 2025

Uh oh!

bcardosolopes Nov 18, 2025

Uh oh!

github-actions bot commented Nov 17, 2025 •

edited

Loading

Uh oh!

bcardosolopes left a comment

Uh oh!

bcardosolopes Nov 18, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[CIR] ExtVectorElementExpr with rvalue base #168260

Are you sure you want to change the base?

[CIR] ExtVectorElementExpr with rvalue base #168260

Conversation

AmrDeveloper commented Nov 16, 2025

Uh oh!

llvmbot commented Nov 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

andykaylor Nov 17, 2025

Choose a reason for hiding this comment

Uh oh!

AmrDeveloper Nov 17, 2025

Choose a reason for hiding this comment

Uh oh!

bcardosolopes Nov 18, 2025

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Nov 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

bcardosolopes left a comment

Choose a reason for hiding this comment

Uh oh!

bcardosolopes Nov 18, 2025

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Nov 16, 2025 •

edited

Loading

github-actions bot commented Nov 17, 2025 •

edited

Loading