Skip to content

Commit 8944243

Browse files
AmrDeveloperlanza
authored andcommitted
[CIR][CIRGen][Builtin][Neon] Lower neon_vldap1_lane_s64 and vldap1q_lane_s64 (llvm#1346)
Lower `neon_vldap1_lane_s64` and `vldap1q_lane_s64` To add atomic `MemOrder` I changed the return type of builder to return LoadOp similar to our builders for StoreOp.
1 parent 6737949 commit 8944243

File tree

3 files changed

+146
-4
lines changed

3 files changed

+146
-4
lines changed

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,7 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
871871
/*mem_order=*/cir::MemOrderAttr{}, /*tbaa=*/cir::TBAAAttr{});
872872
}
873873

874-
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Type ty,
874+
cir::LoadOp createAlignedLoad(mlir::Location loc, mlir::Type ty,
875875
mlir::Value ptr, llvm::MaybeAlign align,
876876
bool isVolatile) {
877877
if (ty != mlir::cast<cir::PointerType>(ptr.getType()).getPointee())
@@ -880,14 +880,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
880880
return CIRBaseBuilderTy::createLoad(loc, ptr, isVolatile, alignment);
881881
}
882882

883-
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Type ty,
883+
cir::LoadOp createAlignedLoad(mlir::Location loc, mlir::Type ty,
884884
mlir::Value ptr, llvm::MaybeAlign align) {
885885
// TODO: make sure callsites shouldn't be really passing volatile.
886886
assert(!cir::MissingFeatures::volatileLoadOrStore());
887887
return createAlignedLoad(loc, ty, ptr, align, /*isVolatile=*/false);
888888
}
889889

890-
mlir::Value
890+
cir::LoadOp
891891
createAlignedLoad(mlir::Location loc, mlir::Type ty, mlir::Value addr,
892892
clang::CharUnits align = clang::CharUnits::One()) {
893893
return createAlignedLoad(loc, ty, addr, align.getAsAlign());

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4453,7 +4453,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
44534453
}
44544454
case NEON::BI__builtin_neon_vldap1_lane_s64:
44554455
case NEON::BI__builtin_neon_vldap1q_lane_s64: {
4456-
llvm_unreachable("NEON::BI__builtin_neon_vldap1q_lane_s64 NYI");
4456+
cir::LoadOp Load = builder.createAlignedLoad(
4457+
Ops[0].getLoc(), vTy.getEltType(), Ops[0], PtrOp0.getAlignment());
4458+
Load.setAtomic(cir::MemOrder::Acquire);
4459+
return builder.create<cir::VecInsertOp>(getLoc(E->getExprLoc()),
4460+
builder.createBitcast(Ops[1], vTy),
4461+
Load, Ops[2]);
44574462
}
44584463
case NEON::BI__builtin_neon_vld1_dup_v:
44594464
case NEON::BI__builtin_neon_vld1q_dup_v: {

clang/test/CIR/CodeGen/AArch64/neon-ldst.c

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,3 +630,140 @@ void test_vstl1_lane_p64(poly64_t *a, poly64x1_t b) {
630630
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
631631
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
632632
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
633+
634+
uint64x2_t test_vldap1q_lane_u64(uint64_t *a, uint64x2_t b) {
635+
return vldap1q_lane_u64(a, b, 1);
636+
}
637+
638+
// CIR-LABEL:test_vldap1q_lane_u64
639+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
640+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
641+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!u64i>, !u64
642+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
643+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 2>
644+
645+
// LLVM: {{.*}}test_vldap1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
646+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
647+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
648+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
649+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
650+
651+
int64x2_t test_vldap1q_lane_s64(int64_t *a, int64x2_t b) {
652+
return vldap1q_lane_s64(a, b, 1);
653+
}
654+
655+
// CIR-LABEL:test_vldap1q_lane_s64
656+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
657+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
658+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
659+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
660+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
661+
662+
// LLVM: {{.*}}test_vldap1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
663+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
664+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
665+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
666+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
667+
668+
float64x2_t test_vldap1q_lane_f64(float64_t *a, float64x2_t b) {
669+
return vldap1q_lane_f64(a, b, 1);
670+
}
671+
672+
// CIR-LABEL:test_vldap1q_lane_f64
673+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
674+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
675+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
676+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!cir.double x 2>
677+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 2>
678+
679+
// LLVM: {{.*}}test_vldap1q_lane_f64(ptr{{.*}}[[PTR:%.*]], <2 x double>{{.*}}[[SRC:%.*]])
680+
// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[SRC]] to <16 x i8>
681+
// LLVM: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] acquire, align 8
682+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
683+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1
684+
685+
poly64x2_t test_vldap1q_lane_p64(poly64_t *a, poly64x2_t b) {
686+
return vldap1q_lane_p64(a, b, 1);
687+
}
688+
689+
// CIR-LABEL:test_vldap1q_lane_p64
690+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
691+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
692+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
693+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
694+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
695+
696+
// LLVM: {{.*}}test_vldap1q_lane_p64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
697+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
698+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
699+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
700+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
701+
702+
uint64x1_t test_vldap1_lane_u64(uint64_t *a, uint64x1_t b) {
703+
return vldap1_lane_u64(a, b, 0);
704+
}
705+
706+
// CIR-LABEL:test_vldap1_lane_u64
707+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
708+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
709+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!u64i>, !u64
710+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
711+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 1>
712+
713+
// LLVM: {{.*}}test_vldap1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
714+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
715+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
716+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
717+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
718+
719+
int64x1_t test_vldap1_lane_s64(int64_t *a, int64x1_t b) {
720+
return vldap1_lane_s64(a, b, 0);
721+
}
722+
723+
// CIR-LABEL:test_vldap1_lane_s64
724+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
725+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
726+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
727+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
728+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
729+
730+
// LLVM: {{.*}}test_vldap1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
731+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
732+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
733+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
734+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
735+
736+
737+
float64x1_t test_vldap1_lane_f64(float64_t *a, float64x1_t b) {
738+
return vldap1_lane_f64(a, b, 0);
739+
}
740+
741+
// CIR-LABEL: test_vldap1_lane_f64
742+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
743+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
744+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
745+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!cir.double x 1>
746+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 1>
747+
748+
// LLVM: {{.*}}test_vldap1_lane_f64(ptr{{.*}}[[PTR:%.*]], <1 x double>{{.*}}[[SRC:%.*]])
749+
// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[SRC]] to <8 x i8>
750+
// LLVM: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] acquire, align 8
751+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
752+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0
753+
754+
poly64x1_t test_vldap1_lane_p64(poly64_t *a, poly64x1_t b) {
755+
return vldap1_lane_p64(a, b, 0);
756+
}
757+
758+
// CIR-LABEL: test_vldap1_lane_p64
759+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
760+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
761+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
762+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
763+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
764+
765+
// LLVM: {{.*}}test_vldap1_lane_p64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
766+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
767+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
768+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
769+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0

0 commit comments

Comments
 (0)