Skip to content

Commit 56f3653

Browse files
AmrDeveloperlanza
authored andcommitted
[CIR][CIRGen][Builtin][Neon] Lower neon_vstl1_lane_s64 and vstl1q_lane_s64 (llvm#1340)
Lower `neon_vstl1_lane_s64` and `vstl1q_lane_s64`
1 parent ddb6da7 commit 56f3653

File tree

2 files changed

+134
-1
lines changed

2 files changed

+134
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4473,7 +4473,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
44734473
}
44744474
case NEON::BI__builtin_neon_vstl1_lane_s64:
44754475
case NEON::BI__builtin_neon_vstl1q_lane_s64: {
4476-
llvm_unreachable("NEON::BI__builtin_neon_vstl1q_lane_s64 NYI");
4476+
Ops[1] = builder.createBitcast(Ops[1], ty);
4477+
Ops[1] = builder.create<cir::VecExtractOp>(Ops[1].getLoc(), Ops[1], Ops[2]);
4478+
cir::StoreOp Store = builder.createAlignedStore(
4479+
getLoc(E->getExprLoc()), Ops[1], Ops[0], PtrOp0.getAlignment());
4480+
Store.setAtomic(cir::MemOrder::Release);
4481+
return Ops[1];
44774482
}
44784483
case NEON::BI__builtin_neon_vld2_v:
44794484
case NEON::BI__builtin_neon_vld2q_v: {

clang/test/CIR/CodeGen/AArch64/neon-ldst.c

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,3 +502,131 @@ void test_vst1q_lane_f64(float64_t * ptr, float64x2_t src) {
502502
// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <2 x double>
503503
// LLVM: [[RES:%.*]] = extractelement <2 x double> [[VEC_CAST1]], i32 1
504504
// LLVM: store double [[RES]], ptr [[PTR]], align 8
505+
506+
void test_vstl1q_lane_u64(uint64_t *a, uint64x2_t b) {
507+
vstl1q_lane_u64(a, b, 1);
508+
}
509+
510+
// CIR-LABEL: test_vstl1q_lane_u64
511+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
512+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 2>
513+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
514+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !u64i, !cir.ptr<!u64i>
515+
516+
// LLVM: {{.*}}test_vstl1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
517+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
518+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
519+
// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
520+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
521+
522+
void test_vstl1q_lane_s64(int64_t *a, int64x2_t b) {
523+
vstl1q_lane_s64(a, b, 1);
524+
}
525+
526+
// CIR-LABEL: test_vstl1q_lane_s64
527+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
528+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
529+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
530+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
531+
532+
// LLVM: {{.*}}test_vstl1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
533+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
534+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
535+
// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
536+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
537+
538+
void test_vstl1q_lane_f64(float64_t *a, float64x2_t b) {
539+
vstl1q_lane_f64(a, b, 1);
540+
}
541+
542+
// CIR-LABEL: test_vstl1q_lane_f64
543+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
544+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 2>
545+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
546+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
547+
548+
// LLVM: {{.*}}test_vstl1q_lane_f64(ptr{{.*}}[[PTR:%.*]], <2 x double>{{.*}}[[SRC:%.*]])
549+
// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[SRC]] to <16 x i8>
550+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
551+
// LLVM: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
552+
// LLVM: store atomic double [[TMP2]], ptr [[PTR]] release, align 8
553+
554+
void test_vstl1q_lane_p64(poly64_t *a, poly64x2_t b) {
555+
vstl1q_lane_p64(a, b, 1);
556+
}
557+
558+
// CIR-LABEL: test_vstl1q_lane_p64
559+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
560+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
561+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
562+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
563+
564+
// LLVM: {{.*}}test_vstl1q_lane_p64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
565+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
566+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
567+
// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
568+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
569+
570+
void test_vstl1_lane_u64(uint64_t *a, uint64x1_t b) {
571+
vstl1_lane_u64(a, b, 0);
572+
}
573+
574+
// CIR-LABEL: test_vstl1_lane_u64
575+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
576+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 1>
577+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
578+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !u64i, !cir.ptr<!u64i>
579+
580+
// LLVM: {{.*}}test_vstl1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
581+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
582+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
583+
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
584+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
585+
586+
void test_vstl1_lane_s64(int64_t *a, int64x1_t b) {
587+
vstl1_lane_s64(a, b, 0);
588+
}
589+
590+
// CIR-LABEL:test_vstl1_lane_s64
591+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
592+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
593+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
594+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
595+
596+
// LLVM: {{.*}}test_vstl1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
597+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
598+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
599+
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
600+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
601+
602+
void test_vstl1_lane_f64(float64_t *a, float64x1_t b) {
603+
vstl1_lane_f64(a, b, 0);
604+
}
605+
606+
// CIR-LABEL:test_vstl1_lane_f64
607+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
608+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 1>
609+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
610+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
611+
612+
// LLVM: {{.*}}test_vstl1_lane_f64(ptr{{.*}}[[PTR:%.*]], <1 x double>{{.*}}[[SRC:%.*]])
613+
// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[SRC]] to <8 x i8>
614+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
615+
// LLVM: [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
616+
// LLVM: store atomic double [[TMP2]], ptr [[PTR]] release, align 8
617+
618+
void test_vstl1_lane_p64(poly64_t *a, poly64x1_t b) {
619+
vstl1_lane_p64(a, b, 0);
620+
}
621+
622+
// CIR-LABEL: test_vstl1_lane_p64
623+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
624+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
625+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
626+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
627+
628+
// LLVM: {{.*}}test_vstl1_lane_p64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
629+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
630+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
631+
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
632+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8

0 commit comments

Comments
 (0)