diff --git a/core/src/core.zig b/core/src/core.zig index 232f0f70f..ada4702d6 100644 --- a/core/src/core.zig +++ b/core/src/core.zig @@ -1,5 +1,6 @@ pub const experimental = @import("core/experimental.zig"); pub const heap = @import("core/heap.zig"); +pub const Io = @import("core/Io.zig"); /// USB data types and helper functions pub const usb = @import("core/usb.zig"); diff --git a/core/src/core/Io.zig b/core/src/core/Io.zig new file mode 100644 index 000000000..d74053b7e --- /dev/null +++ b/core/src/core/Io.zig @@ -0,0 +1,331 @@ +const builtin = @import("builtin"); +const std = @import("std"); +const drivers = @import("drivers"); +const assert = std.debug.assert; +const time = drivers.time; + +/// Unsigned integer with the same alignment as the stack. +const StackUint = usize; + +/// Up direction completely untested. +const stack_growth_direction: enum { up, down } = .down; + +/// Convert size of struct or union into how many byte multiples +/// of stack alignment are needed to store it on the stack. +fn to_stack_units(size: usize) usize { + return std.math.divCeil( + usize, + size, + @sizeOf(StackUint), + ) catch unreachable; +} + +/// Information about when a task should be resumed. +/// This could me made into an interface. +/// Upside: Much easier to move more functionality into core. +/// Downside: Performance (may be mitigated once https://github.com/ziglang/zig/issues/23367 is implemeted). +pub const PauseReason = union(enum) { + const OnStack = [to_stack_units(@sizeOf(@This()))]StackUint; + const PtrMask = struct { ptr: *const usize, mask: usize }; + + /// Task volutarily gave up execution, but is ready to continue. + yield, + sleep_until: time.Absolute align(@alignOf(StackUint)), + bits_mask_all_low: PtrMask, + bits_mask_any_high: PtrMask, + /// This value means there is no context stored on this stack + /// so it can be used to launch a new task. + no_task, + + comptime { + assert(@alignOf(@This()) <= @alignOf(StackUint)); + } + + /// Check if the task should be resumed. + /// The io interface may not be necessary. + pub fn can_resume(this: *const @This(), io: anytype) bool { + return switch (this.*) { + .no_task => false, + .yield => true, + .bits_mask_any_high => |info| { + return @atomicLoad(usize, info.ptr, .acquire) & info.mask != 0; + }, + .bits_mask_all_low => |info| { + return @atomicLoad(usize, info.ptr, .acquire) & info.mask == 0; + }, + .sleep_until => |t| t.is_reached_by(io.monotonic_clock()), + }; + } + + // Returns the context of this task, assuming that the pause reason + // is stored just beyond the end of the stack. + pub fn context(this: *@This()) ?*Context { + if (this.* == .no_task) return null; + + const on_stack: *OnStack = @ptrCast(this); + const both: *ContextAndReason = @fieldParentPtr("reason", on_stack); + return &both.context; + } +}; + +/// All the state preserved between function calls. +/// This assumes cooperative multitasking. Preemtion would need to save more data, +/// but may not be needed thanks to interrupts (see https://github.com/rtic-rs/rtic). +/// The order of registers is arbitrary, this order makes the assembly more compact. +pub const ContextArm = extern struct { + r8: u32, + r9: u32, + r10: u32, + r11: u32, + r12: u32, + r4: u32, + r5: u32, + r6: u32, + r7: u32, + pc: u32, + + comptime { + assert(@alignOf(@This()) <= @alignOf(StackUint)); + } + + /// Return value of context switch. After exiting save_and_switch_raw the pause + /// reason needs to be saved to the stack (this was troublesome to do in assembly). + const SaveResult = packed struct { + previous_reason: *PauseReason, + previous_pause_reason: *const PauseReason, + }; + + /// Switches context to `switch_to` and then stores `reason` just beyond the stack. + /// The address at which `reason` was stored is saved to `save_to`. + /// By doing it this way we can store all information that a scheduler needs on top of the task stack. + /// This may be made generic so that other schedulers can save any data. + pub fn save_and_switch(save_to: **PauseReason, switch_to: *@This(), reason: *const PauseReason) void { + const raw: *const fn ( + **PauseReason, + *const PauseReason, + *@This(), + ) callconv(.c) SaveResult = + @ptrCast(&save_and_switch_raw); + + const ret = raw(save_to, reason, switch_to); + ret.previous_reason.* = ret.previous_pause_reason.*; + } + + /// Prepares the saved context registers to launch a new task. + /// In this implementation, the same mechanism as in a context switch is used. + /// The context switch assembly 'just so happens' to temporarily store some of the + /// context in registers used for argument passing (arguments 3 and 4). + /// Arguments 1 and 2 correspond to the return value, so they contain + /// what would be the return value of `save_and_switch`. + pub fn init_launch(this: *@This(), F: type, func: *const F, args: *const std.meta.ArgsTuple(F)) void { + const LaunchTask = struct { + // ABI arguments 1 and 2 are used for the two structure fields. + fn launch_task(ret: SaveResult, f: *const F, a: @TypeOf(args)) callconv(.c) void { + ret.previous_reason.* = ret.previous_pause_reason.*; + + _ = @call(.auto, f, a.*); + + // TODO: futures and return values + std.debug.panic("task returned", .{}); + } + }; + + this.pc = @intFromPtr(&LaunchTask.launch_task); + // r8 corresponds to argument 3, r9 to arg 4. + this.r8 = @intFromPtr(func); + this.r9 = @intFromPtr(args); + } + + /// Piece of assembly used for context switch. + fn save_and_switch_raw() callconv(.naked) void { + asm volatile ( + // save registers + \\push {r4,r5,r6,r7,lr} + \\mov r4, r8 + \\mov r5, r9 + \\mov r6, r10 + \\mov r7, r11 + \\mov lr, r12 + \\push {r4,r5,r6,r7,lr} + // switch sp + \\mov r4, sp + \\subs r4, r4, #12 + \\str r4, [r0] + \\mov sp, r2 + \\mov r0, r4 + // load registers + \\pop {r2,r3,r4,r5,r6} + \\mov r8, r2 + \\mov r9, r3 + \\mov r10, r4 + \\mov r11, r5 + \\mov r12, r6 + \\pop {r4,r5,r6,r7,pc} + ::: .{ .memory = true }); + } +}; + +// TODO +pub const ContextRV32 = struct { + pub fn save_and_switch(save_to: **PauseReason, switch_to: *@This(), reason: *const PauseReason) void { + _ = save_to; + _ = switch_to; + _ = reason; + std.debug.panic("Unimplemented", .{}); + } + + pub fn init_launch(this: *@This(), F: type, func: *const F, args: *const std.meta.ArgsTuple(F)) void { + _ = this; + _ = func; + _ = args; + std.debug.panic("Unimplemented", .{}); + } +}; + +/// Context type of the current target. +pub const Context = switch (builtin.target.cpu.arch) { + .thumb => switch (builtin.target.abi) { + .eabi, .eabihf => ContextArm, + // TODO: also save fpu registers + // .eabihf => ContextThumbFloat + else => |abi| @compileError("Unsupported abi: " ++ @tagName(abi)), + }, + .riscv32 => switch (builtin.target.abi) { + .eabi => ContextRV32, + else => |abi| @compileError("Unsupported abi: " ++ @tagName(abi)), + }, + else => |arch| @compileError("Unsupported architecture: " ++ @tagName(arch)), +}; + +/// The order in which those elemts appear on the stack. +const ContextAndReason = switch (stack_growth_direction) { + .up => extern struct { context: Context, reason: PauseReason.OnStack }, + .down => extern struct { reason: PauseReason.OnStack, context: Context }, +}; + +const EmptyStackLayout = switch (stack_growth_direction) { + .up => extern struct { + len: usize, + reason: PauseReason.OnStack, + }, + .down => extern struct { + reason: PauseReason.OnStack, + len: usize, + }, +}; + +/// Mark stack as empty and ready to launch a task. +pub fn prepare_empty_stack(stack: []StackUint) *PauseReason { + const layout_len = @divExact(@sizeOf(EmptyStackLayout), @sizeOf(StackUint)); + const layout: *EmptyStackLayout = switch (stack_growth_direction) { + .up => @ptrCast(stack.ptr), + .down => @ptrCast(stack.ptr + stack.len - layout_len), + }; + + const reason: *PauseReason = @ptrCast(&layout.reason); + layout.len = stack.len; + reason.* = .no_task; + return reason; +} + +/// Prepare the stack for launching a new task. +pub fn prepare_task_stack(comptime F: type, f: *const F, stack: *PauseReason) struct { + reason: *PauseReason, + args: *std.meta.ArgsTuple(F), +} { + if (stack.context() != null) + std.debug.panic("Stack needs to be empty!", .{}); + + const on_stack: *PauseReason.OnStack = @ptrCast(stack); + const empty_addr: *EmptyStackLayout = @fieldParentPtr("reason", on_stack); + + const Args = std.meta.ArgsTuple(F); + const Result = @typeInfo(F).@"fn".return_type.?; + const size_result = if (Result == noreturn) 0 else @sizeOf(Result); + const ArgsResultUnion = [to_stack_units(@max(@sizeOf(Args), size_result))]StackUint; + + const Layout = switch (stack_growth_direction) { + .up => extern struct { + len: usize, + args_ret: ArgsResultUnion, + cr: ContextAndReason, + }, + .down => extern struct { + cr: ContextAndReason, + args_ret: ArgsResultUnion, + len: usize, + }, + }; + + // stack length is in the same place for both layouts. + const layout: *Layout = @fieldParentPtr("len", &empty_addr.len); + layout.cr.context.init_launch(F, f, @as(*const Args, @ptrCast(&layout.args_ret))); + + return .{ + .reason = @ptrCast(&layout.cr.reason), + .args = @ptrCast(&layout.args_ret), + }; +} + +/// Simple round-robin scheduler. +pub const RoundRobin = struct { + next_swap: usize, + tasks: []*PauseReason, + vtable: VTable, + + /// Pause the current task allow others to run. + pub fn pause(this: *@This(), reason: *const PauseReason) void { + const i = blk: while (true) { + const next_swap = @min(this.next_swap, this.tasks.len); + for (next_swap..this.tasks.len) |i| { + if (this.tasks[i].can_resume(this)) break :blk i; + } + for (0..next_swap) |i| { + if (this.tasks[i].can_resume(this)) break :blk i; + } + if (reason.can_resume(this)) return; + }; + this.next_swap = i + 1; + Context.save_and_switch(&this.tasks[i], this.tasks[i].context().?, reason); + } + + /// Add a task. + pub fn async(this: *@This(), comptime func: anytype, args: std.meta.ArgsTuple(@TypeOf(func))) void { + for (this.tasks) |*task| + if (task.*.context() == null) { + const ptrs = prepare_task_stack(@TypeOf(func), &func, task.*); + + ptrs.reason.* = .yield; + ptrs.args.* = args; + task.* = ptrs.reason; + + return; // TODO: return future + }; + // Maybe we could wait for them to complete instead? + std.debug.panic("Cannot launch more tasks.", .{}); + } + + pub fn monotonic_clock(this: *@This()) time.Absolute { + return this.vtable.monotonic_clock(); + } + + /// Perform memcpy with DMA. `dst` and `src` must have the same length. + pub fn dma_memcpy(this: *@This(), T: type, dst: []T, src: []const T) !DmaResult { + assert(dst.len == src.len); + return this.vtable.dma_memcpy(dst.ptr, src.ptr, dst.len * @sizeOf(T)); + } +}; + +/// TODO: I hate this +pub const DmaResult = struct { + await: *const fn (*@This(), *RoundRobin) void, + channel: u32, +}; + +/// Common functionality between all implementations. +/// Needs to be specified by every port. +pub const VTable = struct { + /// A clock source that only ever goes up, not synchronized with epoch. + monotonic_clock: *const fn () time.Absolute, + dma_memcpy: *const fn (*anyopaque, *const anyopaque, usize) anyerror!DmaResult, +}; diff --git a/examples/raspberrypi/rp2xxx/build.zig b/examples/raspberrypi/rp2xxx/build.zig index 404c76bbf..ef3fc947b 100644 --- a/examples/raspberrypi/rp2xxx/build.zig +++ b/examples/raspberrypi/rp2xxx/build.zig @@ -49,6 +49,7 @@ pub fn build(b: *std.Build) void { const chip_agnostic_examples: []const ChipAgnosticExample = &.{ .{ .name = "adc", .file = "src/adc.zig" }, + .{ .name = "async-blinky", .file = "src/async_blinky.zig" }, .{ .name = "i2c-accel", .file = "src/i2c_accel.zig" }, .{ .name = "i2c-bus-scan", .file = "src/i2c_bus_scan.zig" }, .{ .name = "i2c-hall-effect", .file = "src/i2c_hall_effect.zig" }, @@ -79,20 +80,20 @@ pub fn build(b: *std.Build) void { available_examples.appendSlice(specific_examples) catch @panic("out of memory"); for (chip_agnostic_examples) |example| { available_examples.append(.{ - .target = mb.ports.rp2xxx.boards.raspberrypi.pico, + .target = raspberrypi.pico, .name = b.fmt("pico_{s}", .{example.name}), .file = example.file, }) catch @panic("out of memory"); available_examples.append(.{ - .target = mb.ports.rp2xxx.boards.raspberrypi.pico2_arm, + .target = raspberrypi.pico2_arm, .name = b.fmt("pico2_arm_{s}", .{example.name}), .file = example.file, }) catch @panic("out of memory"); if (example.works_with_riscv) { available_examples.append(.{ - .target = mb.ports.rp2xxx.boards.raspberrypi.pico2_riscv, + .target = raspberrypi.pico2_riscv, .name = b.fmt("pico2_riscv_{s}", .{example.name}), .file = example.file, }) catch @panic("out of memory"); diff --git a/examples/raspberrypi/rp2xxx/src/async_blinky.zig b/examples/raspberrypi/rp2xxx/src/async_blinky.zig new file mode 100644 index 000000000..e29af56ab --- /dev/null +++ b/examples/raspberrypi/rp2xxx/src/async_blinky.zig @@ -0,0 +1,68 @@ +const std = @import("std"); +const microzig = @import("microzig"); +const time = microzig.drivers.time; +const Io = microzig.core.Io; + +const rp2xxx = microzig.hal; + +pub const microzig_options = microzig.Options{ + .log_level = .info, + .logFn = rp2xxx.uart.log, +}; + +const pin_config: rp2xxx.pins.GlobalConfiguration = .{ + .GPIO0 = .{ .function = .UART0_TX }, + .GPIO25 = .{ + .name = "led", + .direction = .out, + }, +}; + +const pins = pin_config.pins(); +const uart = rp2xxx.uart.instance.num(0); + +// Blink the led with given half-period. +fn task_blink(io: *Io.RoundRobin, delay: u32) callconv(.c) noreturn { + var deadline: time.Absolute = io.monotonic_clock(); + while (true) { + pins.led.toggle(); + deadline = deadline.add_duration(.from_us(delay)); + io.pause(&.{ .sleep_until = deadline }); + } +} + +pub fn main() !void { + pin_config.apply(); + uart.apply(.{ .baud_rate = 1_000_000, .clock_config = rp2xxx.clock_config }); + rp2xxx.uart.init_logger(uart); + + // Set up stacks. A helper function that automates this would be nice. + const max_tasks = 8; + var task_stacks_data: [max_tasks][1024]usize = undefined; + var task_stacks: [max_tasks]*Io.PauseReason = undefined; + for (&task_stacks, &task_stacks_data) |*dst, *src| + dst.* = Io.prepare_empty_stack(src); + + var io: Io.RoundRobin = .{ .next_swap = 0, .tasks = &task_stacks, .vtable = rp2xxx.Io.vtable }; + + // Mixing (xoring) two squarewaves of almost the same frequency produces a beat frequency. + io.async(task_blink, .{ &io, 24_000 }); + io.async(task_blink, .{ &io, 25_000 }); + + // DMA demo: using large arrays to prove waiting for transfer completion works. + const src: [1 << 15]u32 = @splat(1); + var dst: [1 << 15]u32 = @splat(0); + std.log.info("Before DMA: {any}", .{dst[dst.len - 16 ..]}); + var future_dma = try io.dma_memcpy(u32, &dst, &src); + future_dma.await(&future_dma, &io); + std.log.info("After DMA: {any}", .{dst[dst.len - 16 ..]}); + + var deadline: time.Absolute = io.monotonic_clock(); + var cnt: u32 = 0; + while (true) { + std.log.info("Hello! {}\r\n", .{cnt}); + cnt += 1; + deadline = deadline.add_duration(.from_ms(1000)); + io.pause(&.{ .sleep_until = deadline }); + } +} diff --git a/port/raspberrypi/rp2xxx/src/hal.zig b/port/raspberrypi/rp2xxx/src/hal.zig index 56f887f02..831eb06e9 100644 --- a/port/raspberrypi/rp2xxx/src/hal.zig +++ b/port/raspberrypi/rp2xxx/src/hal.zig @@ -19,6 +19,7 @@ pub const dma = @import("hal/dma.zig"); pub const drivers = @import("hal/drivers.zig"); pub const flash = @import("hal/flash.zig"); pub const gpio = @import("hal/gpio.zig"); +pub const Io = @import("hal/Io.zig"); pub const multicore = @import("hal/multicore.zig"); pub const mutex = @import("hal/mutex.zig"); pub const pins = @import("hal/pins.zig"); diff --git a/port/raspberrypi/rp2xxx/src/hal/Io.zig b/port/raspberrypi/rp2xxx/src/hal/Io.zig new file mode 100644 index 000000000..adeab8931 --- /dev/null +++ b/port/raspberrypi/rp2xxx/src/hal/Io.zig @@ -0,0 +1,31 @@ +const std = @import("std"); +const microzig = @import("microzig"); +const rp2xxx = @import("../hal.zig"); +const Io = microzig.core.Io; + +/// See Io.VTable for function descriptions. +pub const vtable: Io.VTable = .{ + .monotonic_clock = rp2xxx.time.get_time_since_boot, + .dma_memcpy = dma_memcpy, +}; + +pub fn dma_memcpy(dst: *anyopaque, src: *const anyopaque, size: usize) !Io.DmaResult { + const channel = rp2xxx.dma.claim_unused_channel().?; + try channel.setup_transfer( + @as([*]u8, @ptrCast(dst))[0..size], + @as([*]const u8, @ptrCast(src))[0..size], + .{ .trigger = true, .enable = true }, + ); + return .{ + .await = &dma_await, + .channel = @intFromEnum(channel), + }; +} + +pub fn dma_await(result: *Io.DmaResult, io: *Io.RoundRobin) void { + const channel = rp2xxx.dma.channel(@intCast(result.channel)); + const ctrl = &channel.get_regs().ctrl_trig; + var mask: @TypeOf(ctrl.*).underlying_type = @bitCast(@as(u32, 0)); + mask.BUSY = 1; + io.pause(&.{ .bits_mask_all_low = .{ .ptr = @ptrCast(@volatileCast(ctrl)), .mask = @bitCast(mask) } }); +} diff --git a/port/raspberrypi/rp2xxx/src/hal/dma.zig b/port/raspberrypi/rp2xxx/src/hal/dma.zig index 37b60c0f1..8a25af6e0 100644 --- a/port/raspberrypi/rp2xxx/src/hal/dma.zig +++ b/port/raspberrypi/rp2xxx/src/hal/dma.zig @@ -183,15 +183,16 @@ pub const Channel = enum(u4) { inline fn get_addr(value: anytype) u32 { const Type = @TypeOf(value); const Info = @typeInfo(Type); - switch (Info) { + return switch (Info) { .@"struct" => { - return value.addr; + value.addr; }, - .pointer => { - return @intFromPtr(value); + .pointer => |ptr| switch (ptr.size) { + .one, .c, .many => @intFromPtr(value), + .slice => @intFromPtr(value.ptr), }, else => comptime unreachable, - } + }; } inline fn get_dreq(value: anytype) Dreq {