diff --git a/benchmarks/progress64/README.md b/benchmarks/progress64/README.md new file mode 100644 index 0000000000..807be82df0 --- /dev/null +++ b/benchmarks/progress64/README.md @@ -0,0 +1,21 @@ +# How to generate LLVM files with Inline Asm with Progress64 + +You need to clone [ARM-software/progress64](https://github.com/ARM-software/progress64). + +In this case you have to generate the clients to leverage the API on your own. + +Then, follow this pattern : +``` +clang -S -emit-llvm +``` +The Includes should contain : +1. path to progress64/include +2. path to progress64/src + +The Custom flags should be set up accordingly to your client. In some cases, if you do not get inline asm, try to add : +1. ```-U__ARM_FEATURE_ATOMICS``` to force the compiler to avoid including the builtins + +A valid example would therefore be, from root of progress64 : +``` +clang -I /include -I /src -S -emit-llvm benchmarks/progress64/rwlock.c +``` \ No newline at end of file diff --git a/benchmarks/progress64/rwlock.c b/benchmarks/progress64/rwlock.c new file mode 100644 index 0000000000..46b34ec54b --- /dev/null +++ b/benchmarks/progress64/rwlock.c @@ -0,0 +1,34 @@ +#include +#include +#include "p64_rwlock.c" + +#define NTHREADS 3 + + +void *run(void *arg) { + p64_rwlock_acquire_wr(&lock); + x++; + y++; + p64_rwlock_release_wr(&lock); + return NULL; +} + +int main() { + pthread_t threads[NTHREADS]; + p64_rwlock_init(&lock); + + for (int i = 0; i < NTHREADS; i++) { + if (pthread_create(&threads[i], NULL, run, (void *)(long)i) != 0) { + exit(EXIT_FAILURE); + } + } + + for (int i = 0; i < NTHREADS; i++) { + if (pthread_join(threads[i], NULL) != 0) { + exit(EXIT_FAILURE); + } + } + + assert(x == NTHREADS && y == NTHREADS); + return 0; +} diff --git a/benchmarks/progress64/seqlock.c b/benchmarks/progress64/seqlock.c new file mode 100644 index 0000000000..bf7e132d62 --- /dev/null +++ b/benchmarks/progress64/seqlock.c @@ -0,0 +1,57 @@ +#include +#include +#include "p64_rwsync.c" + +typedef struct { + int x; + int y; +} shared_data_t; + +p64_rwsync_t sync; +shared_data_t data; + +void *writer(void *arg) { + p64_rwsync_acquire_wr(&sync); + data.x++; + data.y++; + p64_rwsync_release_wr(&sync); + return NULL; +} + +void *reader(void *arg) { + shared_data_t local; + p64_rwsync_read(&sync, &local, &data, sizeof(local)); + return NULL; +} + +int main(void) { + pthread_t writers[2], readers[2]; + p64_rwsync_init(&sync); + data.x = 0; + data.y = 0; + + for (int i = 0; i < 2; i++) { + if (pthread_create(&writers[i], NULL, writer, NULL) != 0) { + exit(EXIT_FAILURE); + } + } + for (int i = 0; i < 2; i++) { + if (pthread_create(&readers[i], NULL, reader, NULL) != 0) { + exit(EXIT_FAILURE); + } + } + + for (int i = 0; i < 2; i++) { + if (pthread_join(writers[i], NULL) != 0) { + exit(EXIT_FAILURE); + } + } + + for (int i = 0; i < 2; i++) { + if (pthread_join(readers[i], NULL) != 0) { + exit(EXIT_FAILURE); + } + } + assert(data.x == 2 && data.y == 2); + return 0; +} \ No newline at end of file diff --git a/dartagnan/src/test/java/com/dat3m/dartagnan/asm/armv8/progress64/AsmProgress64Armv8Test.java b/dartagnan/src/test/java/com/dat3m/dartagnan/asm/armv8/progress64/AsmProgress64Armv8Test.java new file mode 100644 index 0000000000..cc96ab1171 --- /dev/null +++ b/dartagnan/src/test/java/com/dat3m/dartagnan/asm/armv8/progress64/AsmProgress64Armv8Test.java @@ -0,0 +1,89 @@ +package com.dat3m.dartagnan.asm.armv8.progress64; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.EnumSet; + +import static org.junit.Assert.assertEquals; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.sosy_lab.common.ShutdownManager; +import org.sosy_lab.common.configuration.Configuration; +import org.sosy_lab.common.configuration.InvalidConfigurationException; +import org.sosy_lab.common.log.BasicLogManager; +import org.sosy_lab.java_smt.SolverContextFactory; +import org.sosy_lab.java_smt.api.SolverContext; + +import com.dat3m.dartagnan.configuration.Arch; +import static com.dat3m.dartagnan.configuration.Property.PROGRAM_SPEC; +import static com.dat3m.dartagnan.configuration.Property.TERMINATION; +import com.dat3m.dartagnan.encoding.ProverWithTracker; +import com.dat3m.dartagnan.parsers.cat.ParserCat; +import com.dat3m.dartagnan.parsers.program.ProgramParser; +import com.dat3m.dartagnan.program.Program; +import static com.dat3m.dartagnan.utils.ResourceHelper.getRootPath; +import static com.dat3m.dartagnan.utils.ResourceHelper.getTestResourcePath; +import com.dat3m.dartagnan.utils.Result; +import static com.dat3m.dartagnan.utils.Result.PASS; +import com.dat3m.dartagnan.verification.VerificationTask; +import com.dat3m.dartagnan.verification.solving.AssumeSolver; +import com.dat3m.dartagnan.verification.solving.RefinementSolver; +import com.dat3m.dartagnan.wmm.Wmm; + +@RunWith(Parameterized.class) +public class AsmProgress64Armv8Test { + + private final String modelPath = getRootPath("cat/aarch64.cat"); + private final String programPath; + private final int bound; + private final Result expected; + + public AsmProgress64Armv8Test (String file, int bound, Result expected) { + this.programPath = getTestResourcePath("asm/armv8/progress64/" + file + ".ll"); + this.bound = bound; + this.expected = expected; + } + + @Parameterized.Parameters(name = "{index}: {0}, {1}, {2}") + public static Iterable data() throws IOException { + return Arrays.asList(new Object[][]{ + {"rwlock", 1, PASS}, + {"seqlock", 3, PASS}, + }); + } + + @Test + public void testAllSolvers() throws Exception { + try (SolverContext ctx = mkCtx(); ProverWithTracker prover = mkProver(ctx)) { + assertEquals(expected, RefinementSolver.run(ctx, prover, mkTask()).getResult()); + } + try (SolverContext ctx = mkCtx(); ProverWithTracker prover = mkProver(ctx)) { + assertEquals(expected, AssumeSolver.run(ctx, prover, mkTask()).getResult()); + } + } + + private SolverContext mkCtx() throws InvalidConfigurationException { + Configuration cfg = Configuration.builder().build(); + return SolverContextFactory.createSolverContext( + cfg, + BasicLogManager.create(cfg), + ShutdownManager.create().getNotifier(), + SolverContextFactory.Solvers.YICES2); + } + + private ProverWithTracker mkProver(SolverContext ctx) { + return new ProverWithTracker(ctx, "", SolverContext.ProverOptions.GENERATE_MODELS); + } + + private VerificationTask mkTask() throws Exception { + VerificationTask.VerificationTaskBuilder builder = VerificationTask.builder() + .withConfig(Configuration.builder().build()) + .withBound(bound) + .withTarget(Arch.ARM8); + Program program = new ProgramParser().parse(new File(programPath)); + Wmm mcm = new ParserCat().parse(new File(modelPath)); + return builder.build(program, mcm, EnumSet.of(TERMINATION, PROGRAM_SPEC)); + } +} diff --git a/dartagnan/src/test/resources/asm/armv8/progress64/rwlock.ll b/dartagnan/src/test/resources/asm/armv8/progress64/rwlock.ll new file mode 100644 index 0000000000..7e77429d5a --- /dev/null +++ b/dartagnan/src/test/resources/asm/armv8/progress64/rwlock.ll @@ -0,0 +1,642 @@ +; ModuleID = 'src/p64_rwlock.c' +source_filename = "src/p64_rwlock.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx15.0.0" + +@x = global i32 0, align 4 +@y = global i32 0, align 4 +@__func__.p64_rwlock_acquire_rd = private unnamed_addr constant [22 x i8] c"p64_rwlock_acquire_rd\00", align 1 +@.str = private unnamed_addr constant [13 x i8] c"p64_rwlock.c\00", align 1 +@.str.1 = private unnamed_addr constant [25 x i8] c"(l & RWLOCK_WRITER) == 0\00", align 1 +@__func__.p64_rwlock_acquire_wr = private unnamed_addr constant [22 x i8] c"p64_rwlock_acquire_wr\00", align 1 +@.str.2 = private unnamed_addr constant [19 x i8] c"l == RWLOCK_WRITER\00", align 1 +@lock = global i32 0, align 4 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.3 = private unnamed_addr constant [31 x i8] c"x == NTHREADS && y == NTHREADS\00", align 1 +@__func__.wait_for_no = private unnamed_addr constant [12 x i8] c"wait_for_no\00", align 1 +@.str.4 = private unnamed_addr constant [16 x i8] c"(l & mask) == 0\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwlock_init(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + store ptr %0, ptr %2, align 8 + %3 = load ptr, ptr %2, align 8 + store i32 0, ptr %3, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwlock_acquire_rd(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8, align 1 + store ptr %0, ptr %2, align 8 + br label %6 + +6: ; preds = %30, %1 + %7 = load ptr, ptr %2, align 8 + %8 = call i32 @wait_for_no(ptr noundef %7, i32 noundef -2147483648, i32 noundef 0) + store i32 %8, ptr %3, align 4 + %9 = load i32, ptr %3, align 4 + %10 = and i32 %9, -2147483648 + %11 = icmp eq i32 %10, 0 + %12 = xor i1 %11, true + %13 = zext i1 %12 to i32 + %14 = sext i32 %13 to i64 + %15 = icmp ne i64 %14, 0 + br i1 %15, label %16, label %18 + +16: ; preds = %6 + call void @__assert_rtn(ptr noundef @__func__.p64_rwlock_acquire_rd, ptr noundef @.str, i32 noundef 60, ptr noundef @.str.1) #5 + unreachable + +17: ; No predecessors! + br label %19 + +18: ; preds = %6 + br label %19 + +19: ; preds = %18, %17 + br label %20 + +20: ; preds = %19 + %21 = load ptr, ptr %2, align 8 + %22 = load i32, ptr %3, align 4 + %23 = add i32 %22, 1 + store i32 %23, ptr %4, align 4 + %24 = load i32, ptr %3, align 4 + %25 = load i32, ptr %4, align 4 + %26 = cmpxchg weak ptr %21, i32 %24, i32 %25 acquire acquire, align 4 + %27 = extractvalue { i32, i1 } %26, 0 + %28 = extractvalue { i32, i1 } %26, 1 + br i1 %28, label %30, label %29 + +29: ; preds = %20 + store i32 %27, ptr %3, align 4 + br label %30 + +30: ; preds = %29, %20 + %31 = zext i1 %28 to i8 + store i8 %31, ptr %5, align 1 + %32 = load i8, ptr %5, align 1 + %33 = trunc i8 %32 to i1 + %34 = xor i1 %33, true + br i1 %34, label %6, label %35, !llvm.loop !6 + +35: ; preds = %30 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define internal i32 @wait_for_no(ptr noundef %0, i32 noundef %1, i32 noundef %2) #0 { + %4 = alloca ptr, align 8 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store i32 %1, ptr %5, align 4 + store i32 %2, ptr %6, align 4 + %9 = load ptr, ptr %4, align 8 + %10 = load i32, ptr %6, align 4 + switch i32 %10, label %11 [ + i32 1, label %13 + i32 2, label %13 + i32 5, label %15 + ] + +11: ; preds = %3 + %12 = load atomic i32, ptr %9 monotonic, align 4 + store i32 %12, ptr %8, align 4 + br label %17 + +13: ; preds = %3, %3 + %14 = load atomic i32, ptr %9 acquire, align 4 + store i32 %14, ptr %8, align 4 + br label %17 + +15: ; preds = %3 + %16 = load atomic i32, ptr %9 seq_cst, align 4 + store i32 %16, ptr %8, align 4 + br label %17 + +17: ; preds = %15, %13, %11 + %18 = load i32, ptr %8, align 4 + store i32 %18, ptr %7, align 4 + %19 = load i32, ptr %5, align 4 + %20 = and i32 %18, %19 + %21 = icmp ne i32 %20, 0 + br i1 %21, label %22, label %32 + +22: ; preds = %17 + br label %23 + +23: ; preds = %30, %22 + %24 = load ptr, ptr %4, align 8 + %25 = load i32, ptr %6, align 4 + %26 = call i32 @ldx32(ptr noundef %24, i32 noundef %25) + store i32 %26, ptr %7, align 4 + %27 = load i32, ptr %5, align 4 + %28 = and i32 %26, %27 + %29 = icmp ne i32 %28, 0 + br i1 %29, label %30, label %31 + +30: ; preds = %23 + call void @wfe() + br label %23, !llvm.loop !8 + +31: ; preds = %23 + br label %32 + +32: ; preds = %31, %17 + %33 = load i32, ptr %7, align 4 + %34 = load i32, ptr %5, align 4 + %35 = and i32 %33, %34 + %36 = icmp eq i32 %35, 0 + %37 = xor i1 %36, true + %38 = zext i1 %37 to i32 + %39 = sext i32 %38 to i64 + %40 = icmp ne i64 %39, 0 + br i1 %40, label %41, label %43 + +41: ; preds = %32 + call void @__assert_rtn(ptr noundef @__func__.wait_for_no, ptr noundef @.str, i32 noundef 48, ptr noundef @.str.4) #5 + unreachable + +42: ; No predecessors! + br label %44 + +43: ; preds = %32 + br label %44 + +44: ; preds = %43, %42 + %45 = load i32, ptr %7, align 4 + ret i32 %45 +} + +; Function Attrs: cold noreturn +declare void @__assert_rtn(ptr noundef, ptr noundef, i32 noundef, ptr noundef) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define zeroext i1 @p64_rwlock_try_acquire_rd(ptr noundef %0) #0 { + %2 = alloca i1, align 1 + %3 = alloca ptr, align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i8, align 1 + store ptr %0, ptr %3, align 8 + br label %8 + +8: ; preds = %27, %1 + %9 = load ptr, ptr %3, align 8 + %10 = load atomic i32, ptr %9 monotonic, align 4 + store i32 %10, ptr %5, align 4 + %11 = load i32, ptr %5, align 4 + store i32 %11, ptr %4, align 4 + %12 = load i32, ptr %4, align 4 + %13 = and i32 %12, -2147483648 + %14 = icmp ne i32 %13, 0 + br i1 %14, label %15, label %16 + +15: ; preds = %8 + store i1 false, ptr %2, align 1 + br label %33 + +16: ; preds = %8 + br label %17 + +17: ; preds = %16 + %18 = load ptr, ptr %3, align 8 + %19 = load i32, ptr %4, align 4 + %20 = add i32 %19, 1 + store i32 %20, ptr %6, align 4 + %21 = load i32, ptr %4, align 4 + %22 = load i32, ptr %6, align 4 + %23 = cmpxchg weak ptr %18, i32 %21, i32 %22 acquire acquire, align 4 + %24 = extractvalue { i32, i1 } %23, 0 + %25 = extractvalue { i32, i1 } %23, 1 + br i1 %25, label %27, label %26 + +26: ; preds = %17 + store i32 %24, ptr %4, align 4 + br label %27 + +27: ; preds = %26, %17 + %28 = zext i1 %25 to i8 + store i8 %28, ptr %7, align 1 + %29 = load i8, ptr %7, align 1 + %30 = trunc i8 %29 to i1 + %31 = xor i1 %30, true + br i1 %31, label %8, label %32, !llvm.loop !9 + +32: ; preds = %27 + store i1 true, ptr %2, align 1 + br label %33 + +33: ; preds = %32, %15 + %34 = load i1, ptr %2, align 1 + ret i1 %34 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwlock_release_rd(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + store ptr %0, ptr %2, align 8 + %6 = load ptr, ptr %2, align 8 + store i32 1, ptr %4, align 4 + %7 = load i32, ptr %4, align 4 + %8 = atomicrmw sub ptr %6, i32 %7 release, align 4 + store i32 %8, ptr %5, align 4 + %9 = load i32, ptr %5, align 4 + store i32 %9, ptr %3, align 4 + %10 = load i32, ptr %3, align 4 + %11 = and i32 %10, 2147483647 + %12 = icmp eq i32 %11, 0 + %13 = xor i1 %12, true + %14 = xor i1 %13, true + %15 = zext i1 %14 to i32 + %16 = sext i32 %15 to i64 + %17 = icmp ne i64 %16, 0 + br i1 %17, label %18, label %19 + +18: ; preds = %1 + br label %19 + +19: ; preds = %18, %1 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwlock_acquire_wr(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8, align 1 + store ptr %0, ptr %2, align 8 + br label %6 + +6: ; preds = %30, %1 + %7 = load ptr, ptr %2, align 8 + %8 = call i32 @wait_for_no(ptr noundef %7, i32 noundef -2147483648, i32 noundef 0) + store i32 %8, ptr %3, align 4 + %9 = load i32, ptr %3, align 4 + %10 = and i32 %9, -2147483648 + %11 = icmp eq i32 %10, 0 + %12 = xor i1 %11, true + %13 = zext i1 %12 to i32 + %14 = sext i32 %13 to i64 + %15 = icmp ne i64 %14, 0 + br i1 %15, label %16, label %18 + +16: ; preds = %6 + call void @__assert_rtn(ptr noundef @__func__.p64_rwlock_acquire_wr, ptr noundef @.str, i32 noundef 113, ptr noundef @.str.1) #5 + unreachable + +17: ; No predecessors! + br label %19 + +18: ; preds = %6 + br label %19 + +19: ; preds = %18, %17 + br label %20 + +20: ; preds = %19 + %21 = load ptr, ptr %2, align 8 + %22 = load i32, ptr %3, align 4 + %23 = or i32 %22, -2147483648 + store i32 %23, ptr %4, align 4 + %24 = load i32, ptr %3, align 4 + %25 = load i32, ptr %4, align 4 + %26 = cmpxchg weak ptr %21, i32 %24, i32 %25 acquire acquire, align 4 + %27 = extractvalue { i32, i1 } %26, 0 + %28 = extractvalue { i32, i1 } %26, 1 + br i1 %28, label %30, label %29 + +29: ; preds = %20 + store i32 %27, ptr %3, align 4 + br label %30 + +30: ; preds = %29, %20 + %31 = zext i1 %28 to i8 + store i8 %31, ptr %5, align 1 + %32 = load i8, ptr %5, align 1 + %33 = trunc i8 %32 to i1 + %34 = xor i1 %33, true + br i1 %34, label %6, label %35, !llvm.loop !10 + +35: ; preds = %30 + %36 = load ptr, ptr %2, align 8 + %37 = call i32 @wait_for_no(ptr noundef %36, i32 noundef 2147483647, i32 noundef 2) + store i32 %37, ptr %3, align 4 + %38 = load i32, ptr %3, align 4 + %39 = icmp eq i32 %38, -2147483648 + %40 = xor i1 %39, true + %41 = zext i1 %40 to i32 + %42 = sext i32 %41 to i64 + %43 = icmp ne i64 %42, 0 + br i1 %43, label %44, label %46 + +44: ; preds = %35 + call void @__assert_rtn(ptr noundef @__func__.p64_rwlock_acquire_wr, ptr noundef @.str, i32 noundef 125, ptr noundef @.str.2) #5 + unreachable + +45: ; No predecessors! + br label %47 + +46: ; preds = %35 + br label %47 + +47: ; preds = %46, %45 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define zeroext i1 @p64_rwlock_try_acquire_wr(ptr noundef %0) #0 { + %2 = alloca i1, align 1 + %3 = alloca ptr, align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i8, align 1 + store ptr %0, ptr %3, align 8 + %8 = load ptr, ptr %3, align 8 + %9 = load atomic i32, ptr %8 monotonic, align 4 + store i32 %9, ptr %5, align 4 + %10 = load i32, ptr %5, align 4 + store i32 %10, ptr %4, align 4 + %11 = load i32, ptr %4, align 4 + %12 = icmp eq i32 %11, 0 + br i1 %12, label %13, label %26 + +13: ; preds = %1 + %14 = load ptr, ptr %3, align 8 + store i32 -2147483648, ptr %6, align 4 + %15 = load i32, ptr %4, align 4 + %16 = load i32, ptr %6, align 4 + %17 = cmpxchg ptr %14, i32 %15, i32 %16 acquire acquire, align 4 + %18 = extractvalue { i32, i1 } %17, 0 + %19 = extractvalue { i32, i1 } %17, 1 + br i1 %19, label %21, label %20 + +20: ; preds = %13 + store i32 %18, ptr %4, align 4 + br label %21 + +21: ; preds = %20, %13 + %22 = zext i1 %19 to i8 + store i8 %22, ptr %7, align 1 + %23 = load i8, ptr %7, align 1 + %24 = trunc i8 %23 to i1 + br i1 %24, label %25, label %26 + +25: ; preds = %21 + store i1 true, ptr %2, align 1 + br label %27 + +26: ; preds = %21, %1 + store i1 false, ptr %2, align 1 + br label %27 + +27: ; preds = %26, %25 + %28 = load i1, ptr %2, align 1 + ret i1 %28 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwlock_release_wr(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + %3 = alloca i32, align 4 + store ptr %0, ptr %2, align 8 + %4 = load ptr, ptr %2, align 8 + %5 = load i32, ptr %4, align 4 + %6 = icmp ne i32 %5, -2147483648 + %7 = xor i1 %6, true + %8 = xor i1 %7, true + %9 = zext i1 %8 to i32 + %10 = sext i32 %9 to i64 + %11 = icmp ne i64 %10, 0 + br i1 %11, label %12, label %13 + +12: ; preds = %1 + br label %16 + +13: ; preds = %1 + %14 = load ptr, ptr %2, align 8 + store i32 0, ptr %3, align 4 + %15 = load i32, ptr %3, align 4 + store atomic i32 %15, ptr %14 release, align 4 + br label %16 + +16: ; preds = %13, %12 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define ptr @run(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + store ptr %0, ptr %2, align 8 + %3 = load ptr, ptr %2, align 8 + call void @p64_rwlock_acquire_wr(ptr noundef @lock) + %4 = load i32, ptr @x, align 4 + %5 = add nsw i32 %4, 1 + store i32 %5, ptr @x, align 4 + %6 = load i32, ptr @y, align 4 + %7 = add nsw i32 %6, 1 + store i32 %7, ptr @y, align 4 + call void @p64_rwlock_release_wr(ptr noundef @lock) + ret ptr null +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca [3 x ptr], align 8 + %3 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + call void @p64_rwlock_init(ptr noundef @lock) + store i32 0, ptr %3, align 4 + br label %4 + +4: ; preds = %18, %0 + %5 = load i32, ptr %3, align 4 + %6 = icmp slt i32 %5, 3 + br i1 %6, label %7, label %21 + +7: ; preds = %4 + %8 = load i32, ptr %3, align 4 + %9 = sext i32 %8 to i64 + %10 = getelementptr inbounds [3 x ptr], ptr %2, i64 0, i64 %9 + %11 = load i32, ptr %3, align 4 + %12 = sext i32 %11 to i64 + %13 = inttoptr i64 %12 to ptr + %14 = call i32 @pthread_create(ptr noundef %10, ptr noundef null, ptr noundef @run, ptr noundef %13) + %15 = icmp ne i32 %14, 0 + br i1 %15, label %16, label %17 + +16: ; preds = %7 + call void @exit(i32 noundef 1) #6 + unreachable + +17: ; preds = %7 + br label %18 + +18: ; preds = %17 + %19 = load i32, ptr %3, align 4 + %20 = add nsw i32 %19, 1 + store i32 %20, ptr %3, align 4 + br label %4, !llvm.loop !11 + +21: ; preds = %4 + store i32 0, ptr %3, align 4 + br label %22 + +22: ; preds = %34, %21 + %23 = load i32, ptr %3, align 4 + %24 = icmp slt i32 %23, 3 + br i1 %24, label %25, label %37 + +25: ; preds = %22 + %26 = load i32, ptr %3, align 4 + %27 = sext i32 %26 to i64 + %28 = getelementptr inbounds [3 x ptr], ptr %2, i64 0, i64 %27 + %29 = load ptr, ptr %28, align 8 + %30 = call i32 @"\01_pthread_join"(ptr noundef %29, ptr noundef null) + %31 = icmp ne i32 %30, 0 + br i1 %31, label %32, label %33 + +32: ; preds = %25 + call void @exit(i32 noundef 1) #6 + unreachable + +33: ; preds = %25 + br label %34 + +34: ; preds = %33 + %35 = load i32, ptr %3, align 4 + %36 = add nsw i32 %35, 1 + store i32 %36, ptr %3, align 4 + br label %22, !llvm.loop !12 + +37: ; preds = %22 + %38 = load i32, ptr @x, align 4 + %39 = icmp eq i32 %38, 3 + br i1 %39, label %40, label %43 + +40: ; preds = %37 + %41 = load i32, ptr @y, align 4 + %42 = icmp eq i32 %41, 3 + br label %43 + +43: ; preds = %40, %37 + %44 = phi i1 [ false, %37 ], [ %42, %40 ] + %45 = xor i1 %44, true + %46 = zext i1 %45 to i32 + %47 = sext i32 %46 to i64 + %48 = icmp ne i64 %47, 0 + br i1 %48, label %49, label %51 + +49: ; preds = %43 + call void @__assert_rtn(ptr noundef @__func__.main, ptr noundef @.str, i32 noundef 194, ptr noundef @.str.3) #5 + unreachable + +50: ; No predecessors! + br label %52 + +51: ; preds = %43 + br label %52 + +52: ; preds = %51, %50 + ret i32 0 +} + +declare i32 @pthread_create(ptr noundef, ptr noundef, ptr noundef, ptr noundef) #2 + +; Function Attrs: noreturn +declare void @exit(i32 noundef) #3 + +declare i32 @"\01_pthread_join"(ptr noundef, ptr noundef) #2 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define internal i32 @ldx32(ptr noundef %0, i32 noundef %1) #0 { + %3 = alloca ptr, align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + store ptr %0, ptr %3, align 8 + store i32 %1, ptr %4, align 4 + %6 = load i32, ptr %4, align 4 + %7 = icmp eq i32 %6, 2 + br i1 %7, label %8, label %11 + +8: ; preds = %2 + %9 = load ptr, ptr %3, align 8 + %10 = call i32 asm sideeffect "ldaxr ${0:w}, [$1]", "=&r,r,~{memory}"(ptr %9) #7, !srcloc !13 + store i32 %10, ptr %5, align 4 + br label %19 + +11: ; preds = %2 + %12 = load i32, ptr %4, align 4 + %13 = icmp eq i32 %12, 0 + br i1 %13, label %14, label %17 + +14: ; preds = %11 + %15 = load ptr, ptr %3, align 8 + %16 = call i32 asm sideeffect "ldxr ${0:w}, [$1]", "=&r,r,~{memory}"(ptr %15) #7, !srcloc !14 + store i32 %16, ptr %5, align 4 + br label %18 + +17: ; preds = %11 + call void @abort() #8 + unreachable + +18: ; preds = %14 + br label %19 + +19: ; preds = %18, %8 + %20 = load i32, ptr %5, align 4 + ret i32 %20 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define internal void @wfe() #0 { + call void asm sideeffect "wfe", "~{memory}"() #7, !srcloc !15 + ret void +} + +; Function Attrs: cold noreturn nounwind +declare void @abort() #4 + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #1 = { cold noreturn "disable-tail-calls"="true" "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #2 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #3 = { noreturn "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #4 = { cold noreturn nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #5 = { cold noreturn } +attributes #6 = { noreturn } +attributes #7 = { nounwind } +attributes #8 = { cold noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 15, i32 0]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 1} +!4 = !{i32 7, !"frame-pointer", i32 1} +!5 = !{!"Homebrew clang version 19.1.7"} +!6 = distinct !{!6, !7} +!7 = !{!"llvm.loop.mustprogress"} +!8 = distinct !{!8, !7} +!9 = distinct !{!9, !7} +!10 = distinct !{!10, !7} +!11 = distinct !{!11, !7} +!12 = distinct !{!12, !7} +!13 = !{i64 1114382} +!14 = !{i64 1114552} +!15 = !{i64 1085651} diff --git a/dartagnan/src/test/resources/asm/armv8/progress64/seqlock.ll b/dartagnan/src/test/resources/asm/armv8/progress64/seqlock.ll new file mode 100644 index 0000000000..e3176fbe49 --- /dev/null +++ b/dartagnan/src/test/resources/asm/armv8/progress64/seqlock.ll @@ -0,0 +1,711 @@ +; ModuleID = 'src/p64_rwsync.c' +source_filename = "src/p64_rwsync.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx15.0.0" + +%struct.shared_data_t = type { i32, i32 } + +@sync = global i32 0, align 4 +@data = global %struct.shared_data_t zeroinitializer, align 4 +@.str = private unnamed_addr constant [24 x i8] c"Reader: x = %d, y = %d\0A\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.1 = private unnamed_addr constant [13 x i8] c"p64_rwsync.c\00", align 1 +@.str.2 = private unnamed_addr constant [27 x i8] c"data.x == 2 && data.y == 2\00", align 1 +@.str.3 = private unnamed_addr constant [30 x i8] c"Final values: x = %d, y = %d\0A\00", align 1 +@__func__.wait_for_no_writer = private unnamed_addr constant [19 x i8] c"wait_for_no_writer\00", align 1 +@.str.4 = private unnamed_addr constant [25 x i8] c"(l & RWSYNC_WRITER) == 0\00", align 1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwsync_init(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + store ptr %0, ptr %2, align 8 + %3 = load ptr, ptr %2, align 8 + store i32 0, ptr %3, align 4 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @p64_rwsync_acquire_rd(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + store ptr %0, ptr %2, align 8 + %3 = load ptr, ptr %2, align 8 + %4 = call i32 @wait_for_no_writer(ptr noundef %3, i32 noundef 2) + ret i32 %4 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define internal i32 @wait_for_no_writer(ptr noundef %0, i32 noundef %1) #0 { + %3 = alloca ptr, align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store ptr %0, ptr %3, align 8 + store i32 %1, ptr %4, align 4 + %7 = load ptr, ptr %3, align 8 + %8 = load i32, ptr %4, align 4 + switch i32 %8, label %9 [ + i32 1, label %11 + i32 2, label %11 + i32 5, label %13 + ] + +9: ; preds = %2 + %10 = load atomic i32, ptr %7 monotonic, align 4 + store i32 %10, ptr %6, align 4 + br label %15 + +11: ; preds = %2, %2 + %12 = load atomic i32, ptr %7 acquire, align 4 + store i32 %12, ptr %6, align 4 + br label %15 + +13: ; preds = %2 + %14 = load atomic i32, ptr %7 seq_cst, align 4 + store i32 %14, ptr %6, align 4 + br label %15 + +15: ; preds = %13, %11, %9 + %16 = load i32, ptr %6, align 4 + store i32 %16, ptr %5, align 4 + %17 = and i32 %16, 1 + %18 = icmp ne i32 %17, 0 + %19 = xor i1 %18, true + %20 = xor i1 %19, true + %21 = zext i1 %20 to i32 + %22 = sext i32 %21 to i64 + %23 = icmp ne i64 %22, 0 + br i1 %23, label %24, label %33 + +24: ; preds = %15 + br label %25 + +25: ; preds = %31, %24 + %26 = load ptr, ptr %3, align 8 + %27 = load i32, ptr %4, align 4 + %28 = call i32 @ldx32(ptr noundef %26, i32 noundef %27) + store i32 %28, ptr %5, align 4 + %29 = and i32 %28, 1 + %30 = icmp ne i32 %29, 0 + br i1 %30, label %31, label %32 + +31: ; preds = %25 + call void @wfe() + br label %25, !llvm.loop !6 + +32: ; preds = %25 + br label %33 + +33: ; preds = %32, %15 + %34 = load i32, ptr %5, align 4 + %35 = and i32 %34, 1 + %36 = icmp eq i32 %35, 0 + %37 = xor i1 %36, true + %38 = zext i1 %37 to i32 + %39 = sext i32 %38 to i64 + %40 = icmp ne i64 %39, 0 + br i1 %40, label %41, label %43 + +41: ; preds = %33 + call void @__assert_rtn(ptr noundef @__func__.wait_for_no_writer, ptr noundef @.str.1, i32 noundef 48, ptr noundef @.str.4) #5 + unreachable + +42: ; No predecessors! + br label %44 + +43: ; preds = %33 + br label %44 + +44: ; preds = %43, %42 + %45 = load i32, ptr %5, align 4 + ret i32 %45 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define zeroext i1 @p64_rwsync_release_rd(ptr noundef %0, i32 noundef %1) #0 { + %3 = alloca ptr, align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + store ptr %0, ptr %3, align 8 + store i32 %1, ptr %4, align 4 + fence acquire + %6 = load ptr, ptr %3, align 8 + %7 = load atomic i32, ptr %6 monotonic, align 4 + store i32 %7, ptr %5, align 4 + %8 = load i32, ptr %5, align 4 + %9 = load i32, ptr %4, align 4 + %10 = icmp eq i32 %8, %9 + ret i1 %10 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwsync_acquire_wr(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8, align 1 + store ptr %0, ptr %2, align 8 + br label %6 + +6: ; preds = %19, %1 + %7 = load ptr, ptr %2, align 8 + %8 = call i32 @wait_for_no_writer(ptr noundef %7, i32 noundef 0) + store i32 %8, ptr %3, align 4 + br label %9 + +9: ; preds = %6 + %10 = load ptr, ptr %2, align 8 + %11 = load i32, ptr %3, align 4 + %12 = add i32 %11, 1 + store i32 %12, ptr %4, align 4 + %13 = load i32, ptr %3, align 4 + %14 = load i32, ptr %4, align 4 + %15 = cmpxchg weak ptr %10, i32 %13, i32 %14 acquire monotonic, align 4 + %16 = extractvalue { i32, i1 } %15, 0 + %17 = extractvalue { i32, i1 } %15, 1 + br i1 %17, label %19, label %18 + +18: ; preds = %9 + store i32 %16, ptr %3, align 4 + br label %19 + +19: ; preds = %18, %9 + %20 = zext i1 %17 to i8 + store i8 %20, ptr %5, align 1 + %21 = load i8, ptr %5, align 1 + %22 = trunc i8 %21 to i1 + %23 = xor i1 %22, true + br i1 %23, label %6, label %24, !llvm.loop !8 + +24: ; preds = %19 + fence release + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwsync_release_wr(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store ptr %0, ptr %2, align 8 + %5 = load ptr, ptr %2, align 8 + %6 = load i32, ptr %5, align 4 + store i32 %6, ptr %3, align 4 + %7 = load i32, ptr %3, align 4 + %8 = and i32 %7, 1 + %9 = icmp ne i32 %8, 0 + %10 = xor i1 %9, true + %11 = xor i1 %10, true + %12 = zext i1 %11 to i32 + %13 = sext i32 %12 to i64 + %14 = icmp eq i64 %13, 0 + br i1 %14, label %15, label %16 + +15: ; preds = %1 + br label %21 + +16: ; preds = %1 + %17 = load ptr, ptr %2, align 8 + %18 = load i32, ptr %3, align 4 + %19 = add i32 %18, 1 + store i32 %19, ptr %4, align 4 + %20 = load i32, ptr %4, align 4 + store atomic i32 %20, ptr %17 release, align 4 + br label %21 + +21: ; preds = %16, %15 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwsync_read(ptr noundef %0, ptr noundef %1, ptr noundef %2, i64 noundef %3) #0 { + %5 = alloca ptr, align 8 + %6 = alloca ptr, align 8 + %7 = alloca ptr, align 8 + %8 = alloca i64, align 8 + %9 = alloca i32, align 4 + store ptr %0, ptr %5, align 8 + store ptr %1, ptr %6, align 8 + store ptr %2, ptr %7, align 8 + store i64 %3, ptr %8, align 8 + br label %10 + +10: ; preds = %16, %4 + %11 = load ptr, ptr %5, align 8 + %12 = call i32 @p64_rwsync_acquire_rd(ptr noundef %11) + store i32 %12, ptr %9, align 4 + %13 = load ptr, ptr %6, align 8 + %14 = load ptr, ptr %7, align 8 + %15 = load i64, ptr %8, align 8 + call void @atomic_memcpy(ptr noundef %13, ptr noundef %14, i64 noundef %15) + br label %16 + +16: ; preds = %10 + %17 = load ptr, ptr %5, align 8 + %18 = load i32, ptr %9, align 4 + %19 = call zeroext i1 @p64_rwsync_release_rd(ptr noundef %17, i32 noundef %18) + %20 = xor i1 %19, true + br i1 %20, label %10, label %21, !llvm.loop !9 + +21: ; preds = %16 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define internal void @atomic_memcpy(ptr noundef %0, ptr noundef %1, i64 noundef %2) #0 { + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i64, align 8 + %7 = alloca i64, align 8 + %8 = alloca i64, align 8 + %9 = alloca i64, align 8 + %10 = alloca i64, align 8 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + %13 = alloca i32, align 4 + %14 = alloca i64, align 8 + %15 = alloca i16, align 2 + %16 = alloca i16, align 2 + %17 = alloca i16, align 2 + %18 = alloca i64, align 8 + %19 = alloca i8, align 1 + %20 = alloca i8, align 1 + %21 = alloca i8, align 1 + %22 = alloca i64, align 8 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i64 %2, ptr %6, align 8 + br label %23 + +23: ; preds = %26, %3 + %24 = load i64, ptr %6, align 8 + %25 = icmp uge i64 %24, 8 + br i1 %25, label %26, label %40 + +26: ; preds = %23 + %27 = load ptr, ptr %5, align 8 + %28 = load atomic i64, ptr %27 monotonic, align 8 + store i64 %28, ptr %8, align 8 + %29 = load i64, ptr %8, align 8 + store i64 %29, ptr %7, align 8 + %30 = load ptr, ptr %5, align 8 + %31 = getelementptr inbounds i8, ptr %30, i64 8 + store ptr %31, ptr %5, align 8 + %32 = load ptr, ptr %4, align 8 + %33 = load i64, ptr %7, align 8 + store i64 %33, ptr %9, align 8 + %34 = load i64, ptr %9, align 8 + store atomic i64 %34, ptr %32 monotonic, align 8 + %35 = load ptr, ptr %4, align 8 + %36 = getelementptr inbounds i8, ptr %35, i64 8 + store ptr %36, ptr %4, align 8 + %37 = load i64, ptr %6, align 8 + %38 = sub i64 %37, 8 + store i64 %38, ptr %6, align 8 + store i64 %38, ptr %10, align 8 + %39 = load i64, ptr %10, align 8 + br label %23, !llvm.loop !10 + +40: ; preds = %23 + %41 = load i64, ptr %6, align 8 + %42 = icmp uge i64 %41, 4 + br i1 %42, label %43, label %57 + +43: ; preds = %40 + %44 = load ptr, ptr %5, align 8 + %45 = load atomic i32, ptr %44 monotonic, align 4 + store i32 %45, ptr %12, align 4 + %46 = load i32, ptr %12, align 4 + store i32 %46, ptr %11, align 4 + %47 = load ptr, ptr %5, align 8 + %48 = getelementptr inbounds i8, ptr %47, i64 4 + store ptr %48, ptr %5, align 8 + %49 = load ptr, ptr %4, align 8 + %50 = load i32, ptr %11, align 4 + store i32 %50, ptr %13, align 4 + %51 = load i32, ptr %13, align 4 + store atomic i32 %51, ptr %49 monotonic, align 4 + %52 = load ptr, ptr %4, align 8 + %53 = getelementptr inbounds i8, ptr %52, i64 4 + store ptr %53, ptr %4, align 8 + %54 = load i64, ptr %6, align 8 + %55 = sub i64 %54, 4 + store i64 %55, ptr %6, align 8 + store i64 %55, ptr %14, align 8 + %56 = load i64, ptr %14, align 8 + br label %57 + +57: ; preds = %43, %40 + %58 = load i64, ptr %6, align 8 + %59 = icmp uge i64 %58, 2 + br i1 %59, label %60, label %74 + +60: ; preds = %57 + %61 = load ptr, ptr %5, align 8 + %62 = load atomic i16, ptr %61 monotonic, align 2 + store i16 %62, ptr %16, align 2 + %63 = load i16, ptr %16, align 2 + store i16 %63, ptr %15, align 2 + %64 = load ptr, ptr %5, align 8 + %65 = getelementptr inbounds i8, ptr %64, i64 2 + store ptr %65, ptr %5, align 8 + %66 = load ptr, ptr %4, align 8 + %67 = load i16, ptr %15, align 2 + store i16 %67, ptr %17, align 2 + %68 = load i16, ptr %17, align 2 + store atomic i16 %68, ptr %66 monotonic, align 2 + %69 = load ptr, ptr %4, align 8 + %70 = getelementptr inbounds i8, ptr %69, i64 2 + store ptr %70, ptr %4, align 8 + %71 = load i64, ptr %6, align 8 + %72 = sub i64 %71, 2 + store i64 %72, ptr %6, align 8 + store i64 %72, ptr %18, align 8 + %73 = load i64, ptr %18, align 8 + br label %74 + +74: ; preds = %60, %57 + %75 = load i64, ptr %6, align 8 + %76 = icmp uge i64 %75, 1 + br i1 %76, label %77, label %91 + +77: ; preds = %74 + %78 = load ptr, ptr %5, align 8 + %79 = load atomic i8, ptr %78 monotonic, align 1 + store i8 %79, ptr %20, align 1 + %80 = load i8, ptr %20, align 1 + store i8 %80, ptr %19, align 1 + %81 = load ptr, ptr %5, align 8 + %82 = getelementptr inbounds i8, ptr %81, i64 1 + store ptr %82, ptr %5, align 8 + %83 = load ptr, ptr %4, align 8 + %84 = load i8, ptr %19, align 1 + store i8 %84, ptr %21, align 1 + %85 = load i8, ptr %21, align 1 + store atomic i8 %85, ptr %83 monotonic, align 1 + %86 = load ptr, ptr %4, align 8 + %87 = getelementptr inbounds i8, ptr %86, i64 1 + store ptr %87, ptr %4, align 8 + %88 = load i64, ptr %6, align 8 + %89 = sub i64 %88, 1 + store i64 %89, ptr %6, align 8 + store i64 %89, ptr %22, align 8 + %90 = load i64, ptr %22, align 8 + br label %91 + +91: ; preds = %77, %74 + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @p64_rwsync_write(ptr noundef %0, ptr noundef %1, ptr noundef %2, i64 noundef %3) #0 { + %5 = alloca ptr, align 8 + %6 = alloca ptr, align 8 + %7 = alloca ptr, align 8 + %8 = alloca i64, align 8 + store ptr %0, ptr %5, align 8 + store ptr %1, ptr %6, align 8 + store ptr %2, ptr %7, align 8 + store i64 %3, ptr %8, align 8 + %9 = load ptr, ptr %5, align 8 + call void @p64_rwsync_acquire_wr(ptr noundef %9) + %10 = load ptr, ptr %7, align 8 + %11 = load ptr, ptr %6, align 8 + %12 = load i64, ptr %8, align 8 + call void @atomic_memcpy(ptr noundef %10, ptr noundef %11, i64 noundef %12) + %13 = load ptr, ptr %5, align 8 + call void @p64_rwsync_release_wr(ptr noundef %13) + ret void +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define ptr @writer(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + store ptr %0, ptr %2, align 8 + %3 = load ptr, ptr %2, align 8 + call void @p64_rwsync_acquire_wr(ptr noundef @sync) + %4 = load i32, ptr @data, align 4 + %5 = add nsw i32 %4, 1 + store i32 %5, ptr @data, align 4 + %6 = load i32, ptr getelementptr inbounds (%struct.shared_data_t, ptr @data, i32 0, i32 1), align 4 + %7 = add nsw i32 %6, 1 + store i32 %7, ptr getelementptr inbounds (%struct.shared_data_t, ptr @data, i32 0, i32 1), align 4 + call void @p64_rwsync_release_wr(ptr noundef @sync) + ret ptr null +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define ptr @reader(ptr noundef %0) #0 { + %2 = alloca ptr, align 8 + %3 = alloca %struct.shared_data_t, align 4 + store ptr %0, ptr %2, align 8 + %4 = load ptr, ptr %2, align 8 + call void @p64_rwsync_read(ptr noundef @sync, ptr noundef %3, ptr noundef @data, i64 noundef 8) + %5 = getelementptr inbounds %struct.shared_data_t, ptr %3, i32 0, i32 0 + %6 = load i32, ptr %5, align 4 + %7 = getelementptr inbounds %struct.shared_data_t, ptr %3, i32 0, i32 1 + %8 = load i32, ptr %7, align 4 + %9 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %6, i32 noundef %8) + ret ptr null +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca [2 x ptr], align 8 + %3 = alloca [2 x ptr], align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + call void @p64_rwsync_init(ptr noundef @sync) + store i32 0, ptr @data, align 4 + store i32 0, ptr getelementptr inbounds (%struct.shared_data_t, ptr @data, i32 0, i32 1), align 4 + store i32 0, ptr %4, align 4 + br label %8 + +8: ; preds = %19, %0 + %9 = load i32, ptr %4, align 4 + %10 = icmp slt i32 %9, 2 + br i1 %10, label %11, label %22 + +11: ; preds = %8 + %12 = load i32, ptr %4, align 4 + %13 = sext i32 %12 to i64 + %14 = getelementptr inbounds [2 x ptr], ptr %2, i64 0, i64 %13 + %15 = call i32 @pthread_create(ptr noundef %14, ptr noundef null, ptr noundef @writer, ptr noundef null) + %16 = icmp ne i32 %15, 0 + br i1 %16, label %17, label %18 + +17: ; preds = %11 + call void @exit(i32 noundef 1) #6 + unreachable + +18: ; preds = %11 + br label %19 + +19: ; preds = %18 + %20 = load i32, ptr %4, align 4 + %21 = add nsw i32 %20, 1 + store i32 %21, ptr %4, align 4 + br label %8, !llvm.loop !11 + +22: ; preds = %8 + store i32 0, ptr %5, align 4 + br label %23 + +23: ; preds = %34, %22 + %24 = load i32, ptr %5, align 4 + %25 = icmp slt i32 %24, 2 + br i1 %25, label %26, label %37 + +26: ; preds = %23 + %27 = load i32, ptr %5, align 4 + %28 = sext i32 %27 to i64 + %29 = getelementptr inbounds [2 x ptr], ptr %3, i64 0, i64 %28 + %30 = call i32 @pthread_create(ptr noundef %29, ptr noundef null, ptr noundef @reader, ptr noundef null) + %31 = icmp ne i32 %30, 0 + br i1 %31, label %32, label %33 + +32: ; preds = %26 + call void @exit(i32 noundef 1) #6 + unreachable + +33: ; preds = %26 + br label %34 + +34: ; preds = %33 + %35 = load i32, ptr %5, align 4 + %36 = add nsw i32 %35, 1 + store i32 %36, ptr %5, align 4 + br label %23, !llvm.loop !12 + +37: ; preds = %23 + store i32 0, ptr %6, align 4 + br label %38 + +38: ; preds = %50, %37 + %39 = load i32, ptr %6, align 4 + %40 = icmp slt i32 %39, 2 + br i1 %40, label %41, label %53 + +41: ; preds = %38 + %42 = load i32, ptr %6, align 4 + %43 = sext i32 %42 to i64 + %44 = getelementptr inbounds [2 x ptr], ptr %2, i64 0, i64 %43 + %45 = load ptr, ptr %44, align 8 + %46 = call i32 @"\01_pthread_join"(ptr noundef %45, ptr noundef null) + %47 = icmp ne i32 %46, 0 + br i1 %47, label %48, label %49 + +48: ; preds = %41 + call void @exit(i32 noundef 1) #6 + unreachable + +49: ; preds = %41 + br label %50 + +50: ; preds = %49 + %51 = load i32, ptr %6, align 4 + %52 = add nsw i32 %51, 1 + store i32 %52, ptr %6, align 4 + br label %38, !llvm.loop !13 + +53: ; preds = %38 + store i32 0, ptr %7, align 4 + br label %54 + +54: ; preds = %66, %53 + %55 = load i32, ptr %7, align 4 + %56 = icmp slt i32 %55, 2 + br i1 %56, label %57, label %69 + +57: ; preds = %54 + %58 = load i32, ptr %7, align 4 + %59 = sext i32 %58 to i64 + %60 = getelementptr inbounds [2 x ptr], ptr %3, i64 0, i64 %59 + %61 = load ptr, ptr %60, align 8 + %62 = call i32 @"\01_pthread_join"(ptr noundef %61, ptr noundef null) + %63 = icmp ne i32 %62, 0 + br i1 %63, label %64, label %65 + +64: ; preds = %57 + call void @exit(i32 noundef 1) #6 + unreachable + +65: ; preds = %57 + br label %66 + +66: ; preds = %65 + %67 = load i32, ptr %7, align 4 + %68 = add nsw i32 %67, 1 + store i32 %68, ptr %7, align 4 + br label %54, !llvm.loop !14 + +69: ; preds = %54 + %70 = load i32, ptr @data, align 4 + %71 = icmp eq i32 %70, 2 + br i1 %71, label %72, label %75 + +72: ; preds = %69 + %73 = load i32, ptr getelementptr inbounds (%struct.shared_data_t, ptr @data, i32 0, i32 1), align 4 + %74 = icmp eq i32 %73, 2 + br label %75 + +75: ; preds = %72, %69 + %76 = phi i1 [ false, %69 ], [ %74, %72 ] + %77 = xor i1 %76, true + %78 = zext i1 %77 to i32 + %79 = sext i32 %78 to i64 + %80 = icmp ne i64 %79, 0 + br i1 %80, label %81, label %83 + +81: ; preds = %75 + call void @__assert_rtn(ptr noundef @__func__.main, ptr noundef @.str.1, i32 noundef 227, ptr noundef @.str.2) #5 + unreachable + +82: ; No predecessors! + br label %84 + +83: ; preds = %75 + br label %84 + +84: ; preds = %83, %82 + %85 = load i32, ptr @data, align 4 + %86 = load i32, ptr getelementptr inbounds (%struct.shared_data_t, ptr @data, i32 0, i32 1), align 4 + %87 = call i32 (ptr, ...) @printf(ptr noundef @.str.3, i32 noundef %85, i32 noundef %86) + ret i32 0 +} + +declare i32 @pthread_create(ptr noundef, ptr noundef, ptr noundef, ptr noundef) #1 + +; Function Attrs: noreturn +declare void @exit(i32 noundef) #2 + +declare i32 @"\01_pthread_join"(ptr noundef, ptr noundef) #1 + +; Function Attrs: cold noreturn +declare void @__assert_rtn(ptr noundef, ptr noundef, i32 noundef, ptr noundef) #3 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define internal i32 @ldx32(ptr noundef %0, i32 noundef %1) #0 { + %3 = alloca ptr, align 8 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + store ptr %0, ptr %3, align 8 + store i32 %1, ptr %4, align 4 + %6 = load i32, ptr %4, align 4 + %7 = icmp eq i32 %6, 2 + br i1 %7, label %8, label %11 + +8: ; preds = %2 + %9 = load ptr, ptr %3, align 8 + %10 = call i32 asm sideeffect "ldaxr ${0:w}, [$1]", "=&r,r,~{memory}"(ptr %9) #7, !srcloc !15 + store i32 %10, ptr %5, align 4 + br label %19 + +11: ; preds = %2 + %12 = load i32, ptr %4, align 4 + %13 = icmp eq i32 %12, 0 + br i1 %13, label %14, label %17 + +14: ; preds = %11 + %15 = load ptr, ptr %3, align 8 + %16 = call i32 asm sideeffect "ldxr ${0:w}, [$1]", "=&r,r,~{memory}"(ptr %15) #7, !srcloc !16 + store i32 %16, ptr %5, align 4 + br label %18 + +17: ; preds = %11 + call void @abort() #8 + unreachable + +18: ; preds = %14 + br label %19 + +19: ; preds = %18, %8 + %20 = load i32, ptr %5, align 4 + ret i32 %20 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define internal void @wfe() #0 { + call void asm sideeffect "wfe", "~{memory}"() #7, !srcloc !17 + ret void +} + +; Function Attrs: cold noreturn nounwind +declare void @abort() #4 + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #2 = { noreturn "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #3 = { cold noreturn "disable-tail-calls"="true" "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #4 = { cold noreturn nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #5 = { cold noreturn } +attributes #6 = { noreturn } +attributes #7 = { nounwind } +attributes #8 = { cold noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 15, i32 0]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 1} +!4 = !{i32 7, !"frame-pointer", i32 1} +!5 = !{!"Homebrew clang version 19.1.7"} +!6 = distinct !{!6, !7} +!7 = !{!"llvm.loop.mustprogress"} +!8 = distinct !{!8, !7} +!9 = distinct !{!9, !7} +!10 = distinct !{!10, !7} +!11 = distinct !{!11, !7} +!12 = distinct !{!12, !7} +!13 = distinct !{!13, !7} +!14 = distinct !{!14, !7} +!15 = !{i64 1208267} +!16 = !{i64 1208437} +!17 = !{i64 973250}