diff --git a/core/src/core.zig b/core/src/core.zig
index 232f0f70f..ada4702d6 100644
--- a/core/src/core.zig
+++ b/core/src/core.zig
@@ -1,5 +1,6 @@
 pub const experimental = @import("core/experimental.zig");
 pub const heap = @import("core/heap.zig");
+pub const Io = @import("core/Io.zig");
 /// USB data types and helper functions
 pub const usb = @import("core/usb.zig");
 
diff --git a/core/src/core/Io.zig b/core/src/core/Io.zig
new file mode 100644
index 000000000..d74053b7e
--- /dev/null
+++ b/core/src/core/Io.zig
@@ -0,0 +1,331 @@
+const builtin = @import("builtin");
+const std = @import("std");
+const drivers = @import("drivers");
+const assert = std.debug.assert;
+const time = drivers.time;
+
+/// Unsigned integer with the same alignment as the stack.
+const StackUint = usize;
+
+/// Up direction completely untested.
+const stack_growth_direction: enum { up, down } = .down;
+
+/// Convert size of struct or union into how many byte multiples
+/// of stack alignment are needed to store it on the stack.
+fn to_stack_units(size: usize) usize {
+    return std.math.divCeil(
+        usize,
+        size,
+        @sizeOf(StackUint),
+    ) catch unreachable;
+}
+
+/// Information about when a task should be resumed.
+/// This could me made into an interface.
+/// Upside: Much easier to move more functionality into core.
+/// Downside: Performance (may be mitigated once https://github.com/ziglang/zig/issues/23367 is implemeted).
+pub const PauseReason = union(enum) {
+    const OnStack = [to_stack_units(@sizeOf(@This()))]StackUint;
+    const PtrMask = struct { ptr: *const usize, mask: usize };
+
+    /// Task volutarily gave up execution, but is ready to continue.
+    yield,
+    sleep_until: time.Absolute align(@alignOf(StackUint)),
+    bits_mask_all_low: PtrMask,
+    bits_mask_any_high: PtrMask,
+    /// This value means there is no context stored on this stack
+    /// so it can be used to launch a new task.
+    no_task,
+
+    comptime {
+        assert(@alignOf(@This()) <= @alignOf(StackUint));
+    }
+
+    /// Check if the task should be resumed.
+    /// The io interface may not be necessary.
+    pub fn can_resume(this: *const @This(), io: anytype) bool {
+        return switch (this.*) {
+            .no_task => false,
+            .yield => true,
+            .bits_mask_any_high => |info| {
+                return @atomicLoad(usize, info.ptr, .acquire) & info.mask != 0;
+            },
+            .bits_mask_all_low => |info| {
+                return @atomicLoad(usize, info.ptr, .acquire) & info.mask == 0;
+            },
+            .sleep_until => |t| t.is_reached_by(io.monotonic_clock()),
+        };
+    }
+
+    // Returns the context of this task, assuming that the pause reason
+    // is stored just beyond the end of the stack.
+    pub fn context(this: *@This()) ?*Context {
+        if (this.* == .no_task) return null;
+
+        const on_stack: *OnStack = @ptrCast(this);
+        const both: *ContextAndReason = @fieldParentPtr("reason", on_stack);
+        return &both.context;
+    }
+};
+
+/// All the state preserved between function calls.
+/// This assumes cooperative multitasking. Preemtion would need to save more data,
+/// but may not be needed thanks to interrupts (see https://github.com/rtic-rs/rtic).
+/// The order of registers is arbitrary, this order makes the assembly more compact.
+pub const ContextArm = extern struct {
+    r8: u32,
+    r9: u32,
+    r10: u32,
+    r11: u32,
+    r12: u32,
+    r4: u32,
+    r5: u32,
+    r6: u32,
+    r7: u32,
+    pc: u32,
+
+    comptime {
+        assert(@alignOf(@This()) <= @alignOf(StackUint));
+    }
+
+    /// Return value of context switch. After exiting save_and_switch_raw the pause
+    /// reason needs to be saved to the stack (this was troublesome to do in assembly).
+    const SaveResult = packed struct {
+        previous_reason: *PauseReason,
+        previous_pause_reason: *const PauseReason,
+    };
+
+    /// Switches context to `switch_to` and then stores `reason` just beyond the stack.
+    /// The address at which `reason` was stored is saved to `save_to`.
+    /// By doing it this way we can store all information that a scheduler needs on top of the task stack.
+    /// This may be made generic so that other schedulers can save any data.
+    pub fn save_and_switch(save_to: **PauseReason, switch_to: *@This(), reason: *const PauseReason) void {
+        const raw: *const fn (
+            **PauseReason,
+            *const PauseReason,
+            *@This(),
+        ) callconv(.c) SaveResult =
+            @ptrCast(&save_and_switch_raw);
+
+        const ret = raw(save_to, reason, switch_to);
+        ret.previous_reason.* = ret.previous_pause_reason.*;
+    }
+
+    /// Prepares the saved context registers to launch a new task.
+    /// In this implementation, the same mechanism as in a context switch is used.
+    /// The context switch assembly 'just so happens' to temporarily store some of the
+    /// context in registers used for argument passing (arguments 3 and 4).
+    /// Arguments 1 and 2 correspond to the return value, so they contain
+    /// what would be the return value of `save_and_switch`.
+    pub fn init_launch(this: *@This(), F: type, func: *const F, args: *const std.meta.ArgsTuple(F)) void {
+        const LaunchTask = struct {
+            // ABI arguments 1 and 2 are used for the two structure fields.
+            fn launch_task(ret: SaveResult, f: *const F, a: @TypeOf(args)) callconv(.c) void {
+                ret.previous_reason.* = ret.previous_pause_reason.*;
+
+                _ = @call(.auto, f, a.*);
+
+                // TODO: futures and return values
+                std.debug.panic("task returned", .{});
+            }
+        };
+
+        this.pc = @intFromPtr(&LaunchTask.launch_task);
+        // r8 corresponds to argument 3, r9 to arg 4.
+        this.r8 = @intFromPtr(func);
+        this.r9 = @intFromPtr(args);
+    }
+
+    /// Piece of assembly used for context switch.
+    fn save_and_switch_raw() callconv(.naked) void {
+        asm volatile (
+        // save registers
+            \\push {r4,r5,r6,r7,lr}
+            \\mov r4, r8
+            \\mov r5, r9
+            \\mov r6, r10
+            \\mov r7, r11
+            \\mov lr, r12
+            \\push {r4,r5,r6,r7,lr}
+            // switch sp
+            \\mov r4, sp
+            \\subs r4, r4, #12
+            \\str r4, [r0]
+            \\mov sp, r2
+            \\mov r0, r4
+            // load registers
+            \\pop {r2,r3,r4,r5,r6}
+            \\mov r8, r2
+            \\mov r9, r3
+            \\mov r10, r4
+            \\mov r11, r5
+            \\mov r12, r6
+            \\pop {r4,r5,r6,r7,pc}
+            ::: .{ .memory = true });
+    }
+};
+
+// TODO
+pub const ContextRV32 = struct {
+    pub fn save_and_switch(save_to: **PauseReason, switch_to: *@This(), reason: *const PauseReason) void {
+        _ = save_to;
+        _ = switch_to;
+        _ = reason;
+        std.debug.panic("Unimplemented", .{});
+    }
+
+    pub fn init_launch(this: *@This(), F: type, func: *const F, args: *const std.meta.ArgsTuple(F)) void {
+        _ = this;
+        _ = func;
+        _ = args;
+        std.debug.panic("Unimplemented", .{});
+    }
+};
+
+/// Context type of the current target.
+pub const Context = switch (builtin.target.cpu.arch) {
+    .thumb => switch (builtin.target.abi) {
+        .eabi, .eabihf => ContextArm,
+        // TODO: also save fpu registers
+        // .eabihf => ContextThumbFloat
+        else => |abi| @compileError("Unsupported abi: " ++ @tagName(abi)),
+    },
+    .riscv32 => switch (builtin.target.abi) {
+        .eabi => ContextRV32,
+        else => |abi| @compileError("Unsupported abi: " ++ @tagName(abi)),
+    },
+    else => |arch| @compileError("Unsupported architecture: " ++ @tagName(arch)),
+};
+
+/// The order in which those elemts appear on the stack.
+const ContextAndReason = switch (stack_growth_direction) {
+    .up => extern struct { context: Context, reason: PauseReason.OnStack },
+    .down => extern struct { reason: PauseReason.OnStack, context: Context },
+};
+
+const EmptyStackLayout = switch (stack_growth_direction) {
+    .up => extern struct {
+        len: usize,
+        reason: PauseReason.OnStack,
+    },
+    .down => extern struct {
+        reason: PauseReason.OnStack,
+        len: usize,
+    },
+};
+
+/// Mark stack as empty and ready to launch a task.
+pub fn prepare_empty_stack(stack: []StackUint) *PauseReason {
+    const layout_len = @divExact(@sizeOf(EmptyStackLayout), @sizeOf(StackUint));
+    const layout: *EmptyStackLayout = switch (stack_growth_direction) {
+        .up => @ptrCast(stack.ptr),
+        .down => @ptrCast(stack.ptr + stack.len - layout_len),
+    };
+
+    const reason: *PauseReason = @ptrCast(&layout.reason);
+    layout.len = stack.len;
+    reason.* = .no_task;
+    return reason;
+}
+
+/// Prepare the stack for launching a new task.
+pub fn prepare_task_stack(comptime F: type, f: *const F, stack: *PauseReason) struct {
+    reason: *PauseReason,
+    args: *std.meta.ArgsTuple(F),
+} {
+    if (stack.context() != null)
+        std.debug.panic("Stack needs to be empty!", .{});
+
+    const on_stack: *PauseReason.OnStack = @ptrCast(stack);
+    const empty_addr: *EmptyStackLayout = @fieldParentPtr("reason", on_stack);
+
+    const Args = std.meta.ArgsTuple(F);
+    const Result = @typeInfo(F).@"fn".return_type.?;
+    const size_result = if (Result == noreturn) 0 else @sizeOf(Result);
+    const ArgsResultUnion = [to_stack_units(@max(@sizeOf(Args), size_result))]StackUint;
+
+    const Layout = switch (stack_growth_direction) {
+        .up => extern struct {
+            len: usize,
+            args_ret: ArgsResultUnion,
+            cr: ContextAndReason,
+        },
+        .down => extern struct {
+            cr: ContextAndReason,
+            args_ret: ArgsResultUnion,
+            len: usize,
+        },
+    };
+
+    // stack length is in the same place for both layouts.
+    const layout: *Layout = @fieldParentPtr("len", &empty_addr.len);
+    layout.cr.context.init_launch(F, f, @as(*const Args, @ptrCast(&layout.args_ret)));
+
+    return .{
+        .reason = @ptrCast(&layout.cr.reason),
+        .args = @ptrCast(&layout.args_ret),
+    };
+}
+
+/// Simple round-robin scheduler.
+pub const RoundRobin = struct {
+    next_swap: usize,
+    tasks: []*PauseReason,
+    vtable: VTable,
+
+    /// Pause the current task allow others to run.
+    pub fn pause(this: *@This(), reason: *const PauseReason) void {
+        const i = blk: while (true) {
+            const next_swap = @min(this.next_swap, this.tasks.len);
+            for (next_swap..this.tasks.len) |i| {
+                if (this.tasks[i].can_resume(this)) break :blk i;
+            }
+            for (0..next_swap) |i| {
+                if (this.tasks[i].can_resume(this)) break :blk i;
+            }
+            if (reason.can_resume(this)) return;
+        };
+        this.next_swap = i + 1;
+        Context.save_and_switch(&this.tasks[i], this.tasks[i].context().?, reason);
+    }
+
+    /// Add a task.
+    pub fn async(this: *@This(), comptime func: anytype, args: std.meta.ArgsTuple(@TypeOf(func))) void {
+        for (this.tasks) |*task|
+            if (task.*.context() == null) {
+                const ptrs = prepare_task_stack(@TypeOf(func), &func, task.*);
+
+                ptrs.reason.* = .yield;
+                ptrs.args.* = args;
+                task.* = ptrs.reason;
+
+                return; // TODO: return future
+            };
+        // Maybe we could wait for them to complete instead?
+        std.debug.panic("Cannot launch more tasks.", .{});
+    }
+
+    pub fn monotonic_clock(this: *@This()) time.Absolute {
+        return this.vtable.monotonic_clock();
+    }
+
+    /// Perform memcpy with DMA. `dst` and `src` must have the same length.
+    pub fn dma_memcpy(this: *@This(), T: type, dst: []T, src: []const T) !DmaResult {
+        assert(dst.len == src.len);
+        return this.vtable.dma_memcpy(dst.ptr, src.ptr, dst.len * @sizeOf(T));
+    }
+};
+
+/// TODO: I hate this
+pub const DmaResult = struct {
+    await: *const fn (*@This(), *RoundRobin) void,
+    channel: u32,
+};
+
+/// Common functionality between all implementations.
+/// Needs to be specified by every port.
+pub const VTable = struct {
+    /// A clock source that only ever goes up, not synchronized with epoch.
+    monotonic_clock: *const fn () time.Absolute,
+    dma_memcpy: *const fn (*anyopaque, *const anyopaque, usize) anyerror!DmaResult,
+};
diff --git a/examples/raspberrypi/rp2xxx/build.zig b/examples/raspberrypi/rp2xxx/build.zig
index 404c76bbf..ef3fc947b 100644
--- a/examples/raspberrypi/rp2xxx/build.zig
+++ b/examples/raspberrypi/rp2xxx/build.zig
@@ -49,6 +49,7 @@ pub fn build(b: *std.Build) void {
 
     const chip_agnostic_examples: []const ChipAgnosticExample = &.{
         .{ .name = "adc", .file = "src/adc.zig" },
+        .{ .name = "async-blinky", .file = "src/async_blinky.zig" },
         .{ .name = "i2c-accel", .file = "src/i2c_accel.zig" },
         .{ .name = "i2c-bus-scan", .file = "src/i2c_bus_scan.zig" },
         .{ .name = "i2c-hall-effect", .file = "src/i2c_hall_effect.zig" },
@@ -79,20 +80,20 @@ pub fn build(b: *std.Build) void {
     available_examples.appendSlice(specific_examples) catch @panic("out of memory");
     for (chip_agnostic_examples) |example| {
         available_examples.append(.{
-            .target = mb.ports.rp2xxx.boards.raspberrypi.pico,
+            .target = raspberrypi.pico,
             .name = b.fmt("pico_{s}", .{example.name}),
             .file = example.file,
         }) catch @panic("out of memory");
 
         available_examples.append(.{
-            .target = mb.ports.rp2xxx.boards.raspberrypi.pico2_arm,
+            .target = raspberrypi.pico2_arm,
             .name = b.fmt("pico2_arm_{s}", .{example.name}),
             .file = example.file,
         }) catch @panic("out of memory");
 
         if (example.works_with_riscv) {
             available_examples.append(.{
-                .target = mb.ports.rp2xxx.boards.raspberrypi.pico2_riscv,
+                .target = raspberrypi.pico2_riscv,
                 .name = b.fmt("pico2_riscv_{s}", .{example.name}),
                 .file = example.file,
             }) catch @panic("out of memory");
diff --git a/examples/raspberrypi/rp2xxx/src/async_blinky.zig b/examples/raspberrypi/rp2xxx/src/async_blinky.zig
new file mode 100644
index 000000000..e29af56ab
--- /dev/null
+++ b/examples/raspberrypi/rp2xxx/src/async_blinky.zig
@@ -0,0 +1,68 @@
+const std = @import("std");
+const microzig = @import("microzig");
+const time = microzig.drivers.time;
+const Io = microzig.core.Io;
+
+const rp2xxx = microzig.hal;
+
+pub const microzig_options = microzig.Options{
+    .log_level = .info,
+    .logFn = rp2xxx.uart.log,
+};
+
+const pin_config: rp2xxx.pins.GlobalConfiguration = .{
+    .GPIO0 = .{ .function = .UART0_TX },
+    .GPIO25 = .{
+        .name = "led",
+        .direction = .out,
+    },
+};
+
+const pins = pin_config.pins();
+const uart = rp2xxx.uart.instance.num(0);
+
+// Blink the led with given half-period.
+fn task_blink(io: *Io.RoundRobin, delay: u32) callconv(.c) noreturn {
+    var deadline: time.Absolute = io.monotonic_clock();
+    while (true) {
+        pins.led.toggle();
+        deadline = deadline.add_duration(.from_us(delay));
+        io.pause(&.{ .sleep_until = deadline });
+    }
+}
+
+pub fn main() !void {
+    pin_config.apply();
+    uart.apply(.{ .baud_rate = 1_000_000, .clock_config = rp2xxx.clock_config });
+    rp2xxx.uart.init_logger(uart);
+
+    // Set up stacks. A helper function that automates this would be nice.
+    const max_tasks = 8;
+    var task_stacks_data: [max_tasks][1024]usize = undefined;
+    var task_stacks: [max_tasks]*Io.PauseReason = undefined;
+    for (&task_stacks, &task_stacks_data) |*dst, *src|
+        dst.* = Io.prepare_empty_stack(src);
+
+    var io: Io.RoundRobin = .{ .next_swap = 0, .tasks = &task_stacks, .vtable = rp2xxx.Io.vtable };
+
+    // Mixing (xoring) two squarewaves of almost the same frequency produces a beat frequency.
+    io.async(task_blink, .{ &io, 24_000 });
+    io.async(task_blink, .{ &io, 25_000 });
+
+    // DMA demo: using large arrays to prove waiting for transfer completion works.
+    const src: [1 << 15]u32 = @splat(1);
+    var dst: [1 << 15]u32 = @splat(0);
+    std.log.info("Before DMA: {any}", .{dst[dst.len - 16 ..]});
+    var future_dma = try io.dma_memcpy(u32, &dst, &src);
+    future_dma.await(&future_dma, &io);
+    std.log.info("After DMA: {any}", .{dst[dst.len - 16 ..]});
+
+    var deadline: time.Absolute = io.monotonic_clock();
+    var cnt: u32 = 0;
+    while (true) {
+        std.log.info("Hello! {}\r\n", .{cnt});
+        cnt += 1;
+        deadline = deadline.add_duration(.from_ms(1000));
+        io.pause(&.{ .sleep_until = deadline });
+    }
+}
diff --git a/port/raspberrypi/rp2xxx/src/hal.zig b/port/raspberrypi/rp2xxx/src/hal.zig
index 56f887f02..831eb06e9 100644
--- a/port/raspberrypi/rp2xxx/src/hal.zig
+++ b/port/raspberrypi/rp2xxx/src/hal.zig
@@ -19,6 +19,7 @@ pub const dma = @import("hal/dma.zig");
 pub const drivers = @import("hal/drivers.zig");
 pub const flash = @import("hal/flash.zig");
 pub const gpio = @import("hal/gpio.zig");
+pub const Io = @import("hal/Io.zig");
 pub const multicore = @import("hal/multicore.zig");
 pub const mutex = @import("hal/mutex.zig");
 pub const pins = @import("hal/pins.zig");
diff --git a/port/raspberrypi/rp2xxx/src/hal/Io.zig b/port/raspberrypi/rp2xxx/src/hal/Io.zig
new file mode 100644
index 000000000..adeab8931
--- /dev/null
+++ b/port/raspberrypi/rp2xxx/src/hal/Io.zig
@@ -0,0 +1,31 @@
+const std = @import("std");
+const microzig = @import("microzig");
+const rp2xxx = @import("../hal.zig");
+const Io = microzig.core.Io;
+
+/// See Io.VTable for function descriptions.
+pub const vtable: Io.VTable = .{
+    .monotonic_clock = rp2xxx.time.get_time_since_boot,
+    .dma_memcpy = dma_memcpy,
+};
+
+pub fn dma_memcpy(dst: *anyopaque, src: *const anyopaque, size: usize) !Io.DmaResult {
+    const channel = rp2xxx.dma.claim_unused_channel().?;
+    try channel.setup_transfer(
+        @as([*]u8, @ptrCast(dst))[0..size],
+        @as([*]const u8, @ptrCast(src))[0..size],
+        .{ .trigger = true, .enable = true },
+    );
+    return .{
+        .await = &dma_await,
+        .channel = @intFromEnum(channel),
+    };
+}
+
+pub fn dma_await(result: *Io.DmaResult, io: *Io.RoundRobin) void {
+    const channel = rp2xxx.dma.channel(@intCast(result.channel));
+    const ctrl = &channel.get_regs().ctrl_trig;
+    var mask: @TypeOf(ctrl.*).underlying_type = @bitCast(@as(u32, 0));
+    mask.BUSY = 1;
+    io.pause(&.{ .bits_mask_all_low = .{ .ptr = @ptrCast(@volatileCast(ctrl)), .mask = @bitCast(mask) } });
+}
diff --git a/port/raspberrypi/rp2xxx/src/hal/dma.zig b/port/raspberrypi/rp2xxx/src/hal/dma.zig
index 37b60c0f1..8a25af6e0 100644
--- a/port/raspberrypi/rp2xxx/src/hal/dma.zig
+++ b/port/raspberrypi/rp2xxx/src/hal/dma.zig
@@ -183,15 +183,16 @@ pub const Channel = enum(u4) {
             inline fn get_addr(value: anytype) u32 {
                 const Type = @TypeOf(value);
                 const Info = @typeInfo(Type);
-                switch (Info) {
+                return switch (Info) {
                     .@"struct" => {
-                        return value.addr;
+                        value.addr;
                     },
-                    .pointer => {
-                        return @intFromPtr(value);
+                    .pointer => |ptr| switch (ptr.size) {
+                        .one, .c, .many => @intFromPtr(value),
+                        .slice => @intFromPtr(value.ptr),
                     },
                     else => comptime unreachable,
-                }
+                };
             }
 
             inline fn get_dreq(value: anytype) Dreq {