diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 72e6bea244802..c71a3c7d5f42b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1152,7 +1152,34 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_insertf64x2_256:
   case X86::BI__builtin_ia32_inserti64x2_256:
   case X86::BI__builtin_ia32_insertf64x2_512:
-  case X86::BI__builtin_ia32_inserti64x2_512:
+  case X86::BI__builtin_ia32_inserti64x2_512: {
+    unsigned dstNumElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+    unsigned srcNumElts = cast<cir::VectorType>(ops[1].getType()).getSize();
+    unsigned subVectors = dstNumElts / srcNumElts;
+    assert(llvm::isPowerOf2_32(subVectors) && "Expected power of 2 subvectors");
+    assert(dstNumElts <= 16);
+
+    uint64_t index = getZExtIntValueFromConstOp(ops[2]);
+    index &= subVectors - 1; // Remove any extra bits.
+    index *= srcNumElts;
+
+    llvm::SmallVector<int64_t, 16> mask(dstNumElts);
+    for (unsigned i = 0; i != dstNumElts; ++i)
+      mask[i] = (i >= srcNumElts) ? srcNumElts + (i % srcNumElts) : i;
+
+    mlir::Value op1 =
+        builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[1], mask);
+
+    for (unsigned i = 0; i != dstNumElts; ++i) {
+      if (i >= index && i < (index + srcNumElts))
+        mask[i] = (i - index) + dstNumElts;
+      else
+        mask[i] = i;
+    }
+
+    return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], op1,
+                                    mask);
+  }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask:
   case X86::BI__builtin_ia32_pblendw128:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
index d9a8771023fa7..0de782701ddc4 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
@@ -75,6 +75,51 @@ __m256i test_mm256_undefined_si256(void) {
   return _mm256_undefined_si256();
 }
 
+__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
+  // CIR-LABEL: test_mm256_insertf128_pd
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: test_mm256_insertf128_pd
+  // LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  // OGCG-LABEL: test_mm256_insertf128_pd
+  // OGCG: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_insertf128_pd(A, B, 0);
+}
+
+__m256 test_mm256_insertf128_ps(__m256 A, __m128 B) {
+  // CIR-LABEL: test_mm256_insertf128_ps
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !cir.float>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: test_mm256_insertf128_ps
+  // LLVM: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  // OGCG-LABEL: test_mm256_insertf128_ps
+  // OGCG: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_insertf128_ps(A, B, 1);
+}
+
+__m256i test_mm256_insertf128_si256(__m256i A, __m128i B) {
+  // CIR-LABEL: test_mm256_insertf128_si256
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<8 x !s32i>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s32i>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i]
+
+  // LLVM-LABEL: test_mm256_insertf128_si256
+  // LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm256_insertf128_si256
+  // OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  return _mm256_insertf128_si256(A, B, 0);
+}
+
 __m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
   // CIR-LABEL: test_mm256_shuffle_pd
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !cir.double>
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx-shuffle-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx-shuffle-builtins.c
new file mode 100644
index 0000000000000..8e30ca93ef6cd
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx-shuffle-builtins.c
@@ -0,0 +1,162 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-linux -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+__m256d test0_mm256_insertf128_pd(__m256d a, __m128d b) {
+  // CIR-LABEL: @test0_mm256_insertf128_pd(
+  // CIR: [[A:%.*]] = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !cir.double>>, !cir.vector<4 x !cir.double>
+  // CIR: [[B:%.*]] = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !cir.double>>, !cir.vector<2 x !cir.double>
+  // CIR: %{{.*}} = cir.vec.shuffle([[B]], %{{.*}} : !cir.vector<2 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+  // CIR-NEXT: %{{.*}} = cir.vec.shuffle([[A]], %{{.*}} : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+  // CIR: cir.return %{{.*}} : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: @test0_mm256_insertf128_pd
+  // LLVM:    [[A:%.*]] = load <4 x double>, ptr %{{.*}}, align 32
+  // LLVM:    [[B:%.*]] = load <2 x double>, ptr %{{.*}}, align 16
+  // LLVM-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  // LLVM:    ret <4 x double>
+
+  // OGCG-LABEL: define dso_local <4 x double> @test0_mm256_insertf128_pd(
+  // OGCG-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-NEXT:  [[ENTRY:.*:]]
+  // OGCG-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  // OGCG-NEXT:    ret <4 x double> [[INSERT]]
+  return _mm256_insertf128_pd(a, b, 0);
+}
+
+__m256d test1_mm256_insertf128_pd(__m256d a, __m128d b) {
+  // CIR-LABEL: @test1_mm256_insertf128_pd(
+  // CIR: [[A:%.*]] = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !cir.double>>, !cir.vector<4 x !cir.double>
+  // CIR: [[B:%.*]] = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !cir.double>>, !cir.vector<2 x !cir.double>
+  // CIR: %{{.*}} = cir.vec.shuffle([[B]], %{{.*}} : !cir.vector<2 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.double>
+  // CIR-NEXT: %{{.*}} = cir.vec.shuffle([[A]], %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<4 x !cir.double>
+  // CIR: cir.return %{{.*}} : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: @test1_mm256_insertf128_pd
+  // LLVM:    [[A:%.*]] = load <4 x double>, ptr %{{.*}}, align 32
+  // LLVM:    [[B:%.*]] = load <2 x double>, ptr %{{.*}}, align 16
+  // LLVM-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  // LLVM:    ret <4 x double>
+
+  // OGCG-LABEL: define dso_local <4 x double> @test1_mm256_insertf128_pd(
+  // OGCG-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+  // OGCG-NEXT:  [[ENTRY:.*:]]
+  // OGCG-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  // OGCG-NEXT:    ret <4 x double> [[INSERT]]
+  return _mm256_insertf128_pd(a, b, 1);
+}
+
+__m256 test0_mm256_insertf128_ps(__m256 a, __m128 b) {
+  // CIR-LABEL: @test0_mm256_insertf128_ps(
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
+  // CIR-NEXT: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.float>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
+  // CIR: cir.return %{{.*}} : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: @test0_mm256_insertf128_ps(
+  // LLVM:    %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM-NEXT:    %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // LLVM:    ret <8 x float> %{{.*}}
+
+  // OGCG-LABEL: define dso_local <8 x float> @test0_mm256_insertf128_ps(
+  // OGCG-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+  // OGCG-NEXT:  [[ENTRY:.*:]]
+  // OGCG-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-NEXT:    [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-NEXT:    ret <8 x float> [[INSERT]]
+  return _mm256_insertf128_ps(a, b, 0);
+}
+
+__m256 test1_mm256_insertf128_ps(__m256 a, __m128 b) {
+  // CIR-LABEL: @test1_mm256_insertf128_ps(
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !cir.float>
+  // CIR-NEXT: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<8 x !cir.float>
+  // CIR: cir.return %{{.*}} : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: define dso_local <8 x float> @test1_mm256_insertf128_ps(
+  // LLVM:    %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM-NEXT:    %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  // LLVM:    ret <8 x float> %{{.*}}
+
+  // OGCG-LABEL: define dso_local <8 x float> @test1_mm256_insertf128_ps(
+  // OGCG-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+  // OGCG-NEXT:  [[ENTRY:.*:]]
+  // OGCG-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-NEXT:    [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  // OGCG-NEXT:    ret <8 x float> [[INSERT]]
+  return _mm256_insertf128_ps(a, b, 1);
+}
+
+__m256i test0_mm256_insertf128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: @test0_mm256_insertf128_si256(
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<8 x !s32i>
+  // CIR: [[TMP1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<4 x !s32i>
+  // CIR: %{{.*}} = cir.vec.shuffle([[TMP1]], %{{.*}} : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !s32i>
+  // CIR-NEXT: %{{.*}} = cir.vec.shuffle([[TMP0]], %{{.*}} : !cir.vector<8 x !s32i>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !s32i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i>
+  // CIR: cir.return %{{.*}} : !cir.vector<4 x !s64i>
+
+  // LLVM-LABEL: @test0_mm256_insertf128_si256
+  // LLVM:    [[TMP0:%.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+  // LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> %{{.*}} to <4 x i32>
+  // LLVM:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // LLVM:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+  // LLVM:    ret <4 x i64> %{{.*}}
+
+  // OGCG-LABEL: define dso_local <4 x i64> @test0_mm256_insertf128_si256(
+  // OGCG-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+  // OGCG-NEXT:  [[ENTRY:.*:]]
+  // OGCG-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+  // OGCG-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
+  // OGCG-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+  // OGCG-NEXT:    ret <4 x i64> [[TMP2]]
+  return _mm256_insertf128_si256(a, b, 0);
+}
+
+__m256i test1_mm256_insertf128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: @test1_mm256_insertf128_si256(
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<8 x !s32i>
+  // CIR: [[TMP1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<4 x !s32i>
+  // CIR: %{{.*}} = cir.vec.shuffle([[TMP1]], %{{.*}} : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !s32i>
+  // CIR-NEXT: %{{.*}} = cir.vec.shuffle([[TMP0]], %{{.*}} : !cir.vector<8 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<8 x !s32i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i>
+  // CIR: cir.return %{{.*}} : !cir.vector<4 x !s64i>
+
+  // LLVM-LABEL: @test1_mm256_insertf128_si256
+  // LLVM:    [[TMP0:%.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+  // LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> %{{.*}} to <4 x i32>
+  // LLVM:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  // LLVM:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+  // LLVM:    ret <4 x i64> %{{.*}}
+
+  // OGCG-LABEL: define dso_local <4 x i64> @test1_mm256_insertf128_si256(
+  // OGCG-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+  // OGCG-NEXT:  [[ENTRY:.*:]]
+  // OGCG-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+  // OGCG-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
+  // OGCG-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  // OGCG-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+  // OGCG-NEXT:    ret <4 x i64> [[TMP2]]
+  return _mm256_insertf128_si256(a, b, 1);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
index f27d6e2862f83..d4ed43ca1d26b 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -28,6 +28,36 @@
 
 #include <immintrin.h>
 
+__m256i test0_mm256_inserti128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: test0_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x !s64i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s64i>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !s64i>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s64i>
+
+  // LLVM-LABEL: test0_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  // OGCG-LABEL: test0_mm256_inserti128_si256
+  // OGCG: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_inserti128_si256(a, b, 0);
+}
+
+__m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: test1_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x !s64i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s64i>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !s64i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<4 x !s64i>
+
+  // LLVM-LABEL: test1_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+
+  // OGCG-LABEL: test1_mm256_inserti128_si256
+  // OGCG: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  return _mm256_inserti128_si256(a, b, 1);
+}
+
 __m256i test_mm256_shufflelo_epi16(__m256i a) {
   // CIR-LABEL: _mm256_shufflelo_epi16
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s16i>) [#cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !s16i>
@@ -72,7 +102,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
   // OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
   // OGCG: mul <4 x i64> %{{.*}}, %{{.*}}
 
-return _mm256_mul_epu32(a, b);
+  return _mm256_mul_epu32(a, b);
 }
 
 __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
index 3475e186e0c8f..79156bed56fd0 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
@@ -323,3 +323,51 @@ unsigned char test_ktestz_mask16_u8(__mmask16 A, __mmask16 B) {
   // OGCG: trunc i32 %[[RES]] to i8
   return _ktestz_mask16_u8(A, B);
 }
+
+__m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) {
+  // CIR-LABEL: test_mm512_insertf32x8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<16 x !cir.float>
+
+  // LLVM-LABEL: test_mm512_insertf32x8
+  // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  // OGCG-LABEL: test_mm512_insertf32x8
+  // OGCG: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_insertf32x8(__A, __B, 1);
+}
+
+__m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) {
+  // CIR-LABEL: test_mm512_inserti32x8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<16 x !s32i>
+
+  // LLVM-LABEL: test_mm512_inserti32x8
+  // LLVM: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  // OGCG-LABEL: test_mm512_inserti32x8
+  // OGCG: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_inserti32x8(__A, __B, 1);
+}
+
+__m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) {
+  // CIR-LABEL: test_mm512_insertf64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i] : !cir.vector<8 x !cir.double>
+
+  // LLVM-LABEL: test_mm512_insertf64x2
+  // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+
+  // OGCG-LABEL: test_mm512_insertf64x2
+  // OGCG: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  return _mm512_insertf64x2(__A, __B, 3);
+}
+
+__m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) {
+  // CIR-LABEL: test_mm512_inserti64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s64i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !s64i>
+
+  // LLVM-LABEL: test_mm512_inserti64x2
+  // LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm512_inserti64x2
+  // OGCG: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+  return _mm512_inserti64x2(__A, __B, 1);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index c7443c82546ae..4beec89d7b784 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -126,6 +126,54 @@ __m512d test_mm512_permute_pd(__m512d A) {
     return _mm512_permute_pd(A, 0x55);
 }
 
+__m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) {
+  // CIR-LABEL: test_mm512_insertf64x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<8 x !cir.double>
+
+  // LLVM-LABEL: test_mm512_insertf64x4
+  // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  // OGCG-LABEL: test_mm512_insertf64x4
+  // OGCG: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm512_insertf64x4(__A, __B, 1);
+}
+
+__m512 test_mm512_insertf32x4(__m512 __A, __m128 __B) {
+  // CIR-LABEL: test_mm512_insertf32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !cir.float>
+
+  // LLVM-LABEL: test_mm512_insertf32x4
+  // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm512_insertf32x4
+  // OGCG: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return _mm512_insertf32x4(__A, __B, 1);
+}
+
+__m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) {
+  // CIR-LABEL: test_mm512_inserti64x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s64i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<8 x !s64i>
+
+  // LLVM-LABEL: test_mm512_inserti64x4
+  // LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  // OGCG-LABEL: test_mm512_inserti64x4
+  // OGCG: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm512_inserti64x4(__A, __B, 1);
+}
+
+__m512i test_mm512_inserti32x4(__m512i __A, __m128i __B) {
+  // CIR-LABEL: test_mm512_inserti32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !s32i>
+
+  // LLVM-LABEL: test_mm512_inserti32x4
+  // LLVM: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm512_inserti32x4
+  // OGCG: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return _mm512_inserti32x4(__A, __B, 1);
+}
+
 __mmask16 test_mm512_kand(__mmask16 A, __mmask16 B) {
   // CIR-LABEL: _mm512_kand
   // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
index 9ba3e19d41566..140dcd05d5a38 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
@@ -5,7 +5,6 @@
 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
 
-
 #include <immintrin.h>
 
 __m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
@@ -200,6 +199,30 @@ __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m25
   return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
+__m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) {
+  // CIR-LABEL: test_mm256_insertf32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: @test_mm256_insertf32x4
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  // OGCG-LABEL: @test_mm256_insertf32x4
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_insertf32x4(__A, __B, 1);
+}
+
+__m256i test_mm256_inserti32x4(__m256i __A, __m128i __B) {
+  // CIR-LABEL: test_mm256_inserti32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<8 x !s32i>
+
+  // LLVM-LABEL: @test_mm256_inserti32x4
+  // LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+
+  // OGCG-LABEL: @test_mm256_inserti32x4
+  // OGCG: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_inserti32x4(__A, __B, 1);
+}
+
 __m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
   // CIR-LABEL: _mm_mask_expand_pd
   // CIR: %[[MASK:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
@@ -231,4 +254,3 @@ __m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
 
   return _mm_maskz_expand_pd(__U,__A);
 }
-
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
new file mode 100644
index 0000000000000..6eb1028275477
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+__m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) {
+  // CIR-LABEL: test_mm256_insertf64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: @test_mm256_insertf64x2
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+
+  // OGCG-LABEL: @test_mm256_insertf64x2
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  return _mm256_insertf64x2(__A, __B, 1);
+}
+
+__m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) {
+  // CIR-LABEL: test_mm256_inserti64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !s64i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<4 x !s64i>
+
+  // LLVM-LABEL: @test_mm256_inserti64x2
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+
+  // OGCG-LABEL: @test_mm256_inserti64x2
+  // OGCG: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  return _mm256_inserti64x2(__A, __B, 1);
+}