|
| 1 | +from amaranth import * |
| 2 | +from amaranth.lib import enum, data, wiring, stream, io |
| 3 | +from amaranth.lib.wiring import In, Out |
| 4 | + |
| 5 | +from glasgow.gateware.ports import PortGroup |
| 6 | + |
| 7 | + |
| 8 | +__all__ = ["IOStreamer"] |
| 9 | + |
| 10 | + |
| 11 | +def _filter_ioshape(direction, ioshape): |
| 12 | + direction = io.Direction(direction) |
| 13 | + if direction is io.Direction.Bidir: |
| 14 | + return True |
| 15 | + return io.Direction(ioshape[0]) in (direction, io.Direction.Bidir) |
| 16 | + |
| 17 | + |
| 18 | +def _iter_ioshape(direction, ioshape, *args): # actually filter+iter |
| 19 | + for name, item in ioshape.items(): |
| 20 | + if _filter_ioshape(direction, ioshape[name]): |
| 21 | + yield tuple(arg[name] for arg in args) |
| 22 | + |
| 23 | + |
| 24 | +def _map_ioshape(direction, ioshape, fn): # actually filter+map |
| 25 | + return data.StructLayout({ |
| 26 | + name: fn(item[1]) for name, item in ioshape.items() if _filter_ioshape(direction, item) |
| 27 | + }) |
| 28 | + |
| 29 | + |
| 30 | +class SimulatableDDRBuffer(io.DDRBuffer): |
| 31 | + def elaborate(self, platform): |
| 32 | + if not isinstance(self._port, io.SimulationPort): |
| 33 | + return super().elaborate(platform) |
| 34 | + |
| 35 | + # At the time of writing Amaranth DDRBuffer doesn't allow for simulation, this implements |
| 36 | + # ICE40 semantics for simulation. |
| 37 | + m = Module() |
| 38 | + |
| 39 | + m.submodules.io_buffer = io_buffer = io.Buffer(self.direction, self.port) |
| 40 | + |
| 41 | + if self.direction is not io.Direction.Output: |
| 42 | + m.domains.i_domain_negedge = ClockDomain("i_domain_negedge", local=True) |
| 43 | + m.d.comb += ClockSignal("i_domain_negedge").eq(~ClockSignal(self.i_domain)) |
| 44 | + i_ff = Signal(len(self.port), reset_less=True) |
| 45 | + i_negedge_ff = Signal(len(self.port), reset_less=True) |
| 46 | + i_final_ff = Signal(data.ArrayLayout(len(self.port), 2), reset_less=True) |
| 47 | + m.d[self.i_domain] += i_ff.eq(io_buffer.i) |
| 48 | + m.d["i_domain_negedge"] += i_negedge_ff.eq(io_buffer.i) |
| 49 | + m.d[self.i_domain] += i_final_ff.eq(Cat(i_ff, i_negedge_ff)) |
| 50 | + m.d.comb += self.i.eq(i_final_ff) |
| 51 | + |
| 52 | + if self.direction is not io.Direction.Input: |
| 53 | + m.domains.o_domain_negedge = ClockDomain("o_domain_negedge", local=True) |
| 54 | + m.d.comb += ClockSignal("o_domain_negedge").eq(~ClockSignal(self.o_domain)) |
| 55 | + o_ff = Signal(len(self.port), reset_less=True) |
| 56 | + o_negedge_ff = Signal(len(self.port), reset_less=True) |
| 57 | + oe_ff = Signal(reset_less=True) |
| 58 | + m.d[self.o_domain] += o_ff.eq(self.o[0] ^ o_negedge_ff) |
| 59 | + o1_ff = Signal(len(self.port), reset_less=True) |
| 60 | + m.d[self.o_domain] += o1_ff.eq(self.o[1]) |
| 61 | + m.d["o_domain_negedge"] += o_negedge_ff.eq(o1_ff ^ o_ff) |
| 62 | + m.d[self.o_domain] += oe_ff.eq(self.oe) |
| 63 | + m.d.comb += io_buffer.o.eq(o_ff ^ o_negedge_ff) |
| 64 | + m.d.comb += io_buffer.oe.eq(oe_ff) |
| 65 | + |
| 66 | + return m |
| 67 | + |
| 68 | + |
| 69 | +class IOStreamer(wiring.Component): |
| 70 | + """I/O buffer to stream adapter. |
| 71 | +
|
| 72 | + This adapter instantiates I/O buffers for a port (FF or DDR) and connects them to a pair of |
| 73 | + streams, one for the outputs of the buffers and one for the inputs. Whenever an `o_stream` |
| 74 | + transfer occurs, the state of the output is updated _t1_ cycles later; if `o_stream.p.i_en` |
| 75 | + is set, then _t2_ cycles later, a payload with the data captured at the same time as |
| 76 | + the outputs were updated appears on `i_stream.p.i`. |
| 77 | +
|
| 78 | + Arbitrary ancillary data may be provided with `o_stream` transfers via `o_stream.p.meta`, |
| 79 | + and this data will be relayed back as `i_stream.p.meta` with the output-to-input latency |
| 80 | + of the buffer. Higher-level protocol engines can use this data to indicate how the inputs |
| 81 | + must be processed without needing counters or state machines on a higher level to match |
| 82 | + the latency (and, usually, without needing any knowledge of the latency at all). |
| 83 | +
|
| 84 | + On reset, output ports have their drivers enabled, and bidirectional ports have them disabled. |
| 85 | + All of the signals are deasserted, which could be a low or a high level depending on the port |
| 86 | + polarity. |
| 87 | + """ |
| 88 | + |
| 89 | + @staticmethod |
| 90 | + def o_stream_signature(ioshape, /, *, ratio=1, meta_layout=0): |
| 91 | + return stream.Signature(data.StructLayout({ |
| 92 | + "port": _map_ioshape("o", ioshape, lambda width: data.StructLayout({ |
| 93 | + "o": width if ratio == 1 else data.ArrayLayout(width, ratio), |
| 94 | + "oe": 1, |
| 95 | + })), |
| 96 | + "i_en": 1, |
| 97 | + "meta": meta_layout, |
| 98 | + })) |
| 99 | + |
| 100 | + @staticmethod |
| 101 | + def i_stream_signature(ioshape, /, *, ratio=1, meta_layout=0): |
| 102 | + return stream.Signature(data.StructLayout({ |
| 103 | + "port": _map_ioshape("i", ioshape, lambda width: data.StructLayout({ |
| 104 | + "i": width if ratio == 1 else data.ArrayLayout(width, ratio), |
| 105 | + })), |
| 106 | + "meta": meta_layout, |
| 107 | + })) |
| 108 | + |
| 109 | + def __init__(self, ioshape, ports, /, *, ratio=1, init=None, meta_layout=0): |
| 110 | + assert isinstance(ioshape, (int, dict)) |
| 111 | + assert ratio in (1, 2) |
| 112 | + |
| 113 | + self._ioshape = ioshape |
| 114 | + self._ports = ports |
| 115 | + self._ratio = ratio |
| 116 | + self._init = init |
| 117 | + |
| 118 | + super().__init__({ |
| 119 | + "o_stream": In(self.o_stream_signature(ioshape, ratio=ratio, meta_layout=meta_layout)), |
| 120 | + "i_stream": Out(self.i_stream_signature(ioshape, ratio=ratio, meta_layout=meta_layout)), |
| 121 | + }) |
| 122 | + |
| 123 | + def elaborate(self, platform): |
| 124 | + m = Module() |
| 125 | + |
| 126 | + if self._ratio == 1: |
| 127 | + buffer_cls, latency = io.FFBuffer, 1 |
| 128 | + if self._ratio == 2: |
| 129 | + # FIXME: should this be 2 or 3? the latency differs between i[0] and i[1] |
| 130 | + buffer_cls, latency = SimulatableDDRBuffer, 3 |
| 131 | + |
| 132 | + if isinstance(self._ports, io.PortLike): |
| 133 | + m.submodules.buffer = buffer = buffer_cls("io", self._ports) |
| 134 | + if isinstance(self._ports, PortGroup): |
| 135 | + buffer = {} |
| 136 | + for name, sub_port in self._ports.__dict__.items(): |
| 137 | + direction, _width = self._ioshape[name] |
| 138 | + m.submodules[f"buffer_{name}"] = buffer[name] = buffer_cls(direction, sub_port) |
| 139 | + |
| 140 | + o_latch = Signal(_map_ioshape("o", self._ioshape, lambda width: data.StructLayout({ |
| 141 | + "o": width, |
| 142 | + "oe": 1, |
| 143 | + })), init=self._init) |
| 144 | + with m.If(self.o_stream.valid & self.o_stream.ready): |
| 145 | + for buffer_parts, stream_parts in _iter_ioshape("o", self._ioshape, |
| 146 | + buffer, self.o_stream.p.port): |
| 147 | + m.d.comb += buffer_parts.o.eq(stream_parts.o) |
| 148 | + m.d.comb += buffer_parts.oe.eq(stream_parts.oe) |
| 149 | + for latch_parts, stream_parts in _iter_ioshape("o", self._ioshape, |
| 150 | + o_latch, self.o_stream.p.port): |
| 151 | + if self._ratio == 1: |
| 152 | + m.d.sync += latch_parts.o.eq(stream_parts.o) |
| 153 | + else: |
| 154 | + m.d.sync += latch_parts.o.eq(stream_parts.o[-1]) |
| 155 | + m.d.sync += latch_parts.oe.eq(stream_parts.oe) |
| 156 | + with m.Else(): |
| 157 | + for buffer_parts, latch_parts in _iter_ioshape("o", self._ioshape, |
| 158 | + buffer, o_latch): |
| 159 | + if self._ratio == 1: |
| 160 | + m.d.comb += buffer_parts.o.eq(latch_parts.o) |
| 161 | + else: |
| 162 | + m.d.comb += buffer_parts.o.eq(latch_parts.o.replicate(self._ratio)) |
| 163 | + m.d.comb += buffer_parts.oe.eq(latch_parts.oe) |
| 164 | + |
| 165 | + def delay(value, name): |
| 166 | + for stage in range(latency): |
| 167 | + next_value = Signal.like(value, name=f"{name}_{stage}") |
| 168 | + m.d.sync += next_value.eq(value) |
| 169 | + value = next_value |
| 170 | + return value |
| 171 | + |
| 172 | + i_en = delay(self.o_stream.valid & self.o_stream.ready & |
| 173 | + self.o_stream.p.i_en, name="i_en") |
| 174 | + meta = delay(self.o_stream.p.meta, name="meta") |
| 175 | + |
| 176 | + # This skid buffer is organized as a shift register to avoid any uncertainties associated |
| 177 | + # with the use of an async read memory. On platforms that have LUTRAM, this implementation |
| 178 | + # may be slightly worse than using LUTRAM, and may have to be revisited in the future. |
| 179 | + skid = Array(Signal(self.i_stream.payload.shape(), name=f"skid_{stage}") |
| 180 | + for stage in range(1 + latency)) |
| 181 | + for skid_parts, buffer_parts in _iter_ioshape("i", self._ioshape, skid[0].port, buffer): |
| 182 | + m.d.comb += skid_parts.i.eq(buffer_parts.i) |
| 183 | + m.d.comb += skid[0].meta.eq(meta) |
| 184 | + |
| 185 | + skid_at = Signal(range(1 + latency)) |
| 186 | + with m.If(i_en & ~self.i_stream.ready): |
| 187 | + # m.d.sync += Assert(skid_at != latency) |
| 188 | + m.d.sync += skid_at.eq(skid_at + 1) |
| 189 | + for n_shift in range(latency): |
| 190 | + m.d.sync += skid[n_shift + 1].eq(skid[n_shift]) |
| 191 | + with m.Elif((skid_at != 0) & self.i_stream.ready): |
| 192 | + m.d.sync += skid_at.eq(skid_at - 1) |
| 193 | + |
| 194 | + m.d.comb += self.i_stream.payload.eq(skid[skid_at]) |
| 195 | + m.d.comb += self.i_stream.valid.eq(i_en | (skid_at != 0)) |
| 196 | + m.d.comb += self.o_stream.ready.eq(self.i_stream.ready & (skid_at == 0)) |
| 197 | + |
| 198 | + return m |
| 199 | + |
| 200 | + |
| 201 | +class IOClocker(wiring.Component): |
| 202 | + @staticmethod |
| 203 | + def i_stream_signature(ioshape, /, *, _ratio=1, meta_layout=0): |
| 204 | + # Currently the only supported ratio is 1, but this will change in the future for |
| 205 | + # interfaces like HyperBus. |
| 206 | + return stream.Signature(data.StructLayout({ |
| 207 | + "bypass": 1, |
| 208 | + "port": _map_ioshape("o", ioshape, lambda width: data.StructLayout({ |
| 209 | + "o": width if _ratio == 1 else data.ArrayLayout(width, _ratio), |
| 210 | + "oe": 1, |
| 211 | + })), |
| 212 | + "i_en": 1, |
| 213 | + "meta": meta_layout, |
| 214 | + })) |
| 215 | + |
| 216 | + @staticmethod |
| 217 | + def o_stream_signature(ioshape, /, *, ratio=1, meta_layout=0): |
| 218 | + return IOStreamer.o_stream_signature(ioshape, ratio=ratio, meta_layout=meta_layout) |
| 219 | + |
| 220 | + def __init__(self, ioshape, *, clock, o_ratio=1, meta_layout=0, divisor_width=16): |
| 221 | + assert isinstance(ioshape, dict) |
| 222 | + assert isinstance(clock, str) |
| 223 | + assert o_ratio in (1, 2) |
| 224 | + assert clock in ioshape |
| 225 | + |
| 226 | + self._clock = clock |
| 227 | + self._ioshape = ioshape |
| 228 | + self._o_ratio = o_ratio |
| 229 | + |
| 230 | + super().__init__({ |
| 231 | + "i_stream": In(self.i_stream_signature(ioshape, |
| 232 | + meta_layout=meta_layout)), |
| 233 | + "o_stream": Out(self.o_stream_signature(ioshape, |
| 234 | + ratio=o_ratio, meta_layout=meta_layout)), |
| 235 | + |
| 236 | + # f_clk = f_sync if (o_ratio == 2 and divisor == 0) else f_sync / (2 * max(1, divisor)) |
| 237 | + "divisor": In(divisor_width), |
| 238 | + }) |
| 239 | + |
| 240 | + def elaborate(self, platform): |
| 241 | + m = Module() |
| 242 | + |
| 243 | + # Forward the inputs to the outputs as-is. This includes the clock; it is overridden below |
| 244 | + # if the clocker is used (not bypassed). |
| 245 | + for i_parts, o_parts in _iter_ioshape("io", self._ioshape, |
| 246 | + self.i_stream.p.port, self.o_stream.p.port): |
| 247 | + m.d.comb += o_parts.o .eq(i_parts.o.replicate(self._o_ratio)) |
| 248 | + m.d.comb += o_parts.oe.eq(i_parts.oe) |
| 249 | + m.d.comb += self.o_stream.p.i_en.eq(self.i_stream.p.i_en) |
| 250 | + m.d.comb += self.o_stream.p.meta.eq(self.i_stream.p.meta) |
| 251 | + |
| 252 | + phase = Signal() |
| 253 | + # If the clocker is used... |
| 254 | + with m.If(~self.i_stream.p.bypass): |
| 255 | + # ... ignore the clock in the inputs and replace it with the generated one... |
| 256 | + if self._o_ratio == 1: |
| 257 | + m.d.comb += self.o_stream.p.port[self._clock].o.eq(phase) |
| 258 | + if self._o_ratio == 2: |
| 259 | + m.d.comb += self.o_stream.p.port[self._clock].o.eq(Cat(~phase, phase)) |
| 260 | + m.d.comb += self.o_stream.p.port[self._clock].oe.eq(1) |
| 261 | + # ... while requesting input sampling only for the rising edge. (Interfaces triggering |
| 262 | + # transfers on falling edge will be inverting the clock at the `IOPort` level.) |
| 263 | + m.d.comb += self.o_stream.p.i_en.eq(self.i_stream.p.i_en & phase) |
| 264 | + |
| 265 | + timer = Signal.like(self.divisor) |
| 266 | + with m.If((timer == 0) | (timer == 1)): |
| 267 | + # Only produce output when the timer has expired. This ensures that no clock pulse |
| 268 | + # exceeds the frequency set by `divisor`, except the ones that bypass the clocker. |
| 269 | + m.d.comb += self.o_stream.valid.eq(self.i_stream.valid) |
| 270 | + |
| 271 | + with m.FSM(): |
| 272 | + with m.State("Falling"): |
| 273 | + with m.If(self.i_stream.p.bypass): # Bypass the clocker entirely. |
| 274 | + m.d.comb += self.i_stream.ready.eq(self.o_stream.ready) |
| 275 | + |
| 276 | + with m.Else(): # Produce a falling edge at the output. |
| 277 | + # Whenever DDR output is used, `phase == 1` outputs a low state first and |
| 278 | + # a high state second. When `phase == 1` payloads are output back to back |
| 279 | + # (in DDR mode only!) this generates a pulse train with data changes |
| 280 | + # coinciding with the falling edges. Setting `divisor == 0` in this mode |
| 281 | + # allows clocking the peripheral at the `sync` frequency. |
| 282 | + with m.If((self._o_ratio == 2) & (self.divisor == 0)): |
| 283 | + m.d.comb += phase.eq(1) |
| 284 | + with m.If(self.o_stream.ready): |
| 285 | + m.d.comb += self.i_stream.ready.eq(1) |
| 286 | + with m.Else(): |
| 287 | + m.d.comb += phase.eq(0) |
| 288 | + with m.If(self.o_stream.ready & self.i_stream.valid): |
| 289 | + m.d.sync += timer.eq(self.divisor) |
| 290 | + m.next = "Rising" |
| 291 | + |
| 292 | + with m.State("Rising"): |
| 293 | + m.d.comb += phase.eq(1) |
| 294 | + with m.If(self.o_stream.ready): |
| 295 | + m.d.comb += self.i_stream.ready.eq(1) |
| 296 | + m.d.sync += timer.eq(self.divisor) |
| 297 | + m.next = "Falling" |
| 298 | + |
| 299 | + with m.Else(): |
| 300 | + m.d.sync += timer.eq(timer - 1) |
| 301 | + |
| 302 | + return m |
0 commit comments