Skip to content

Commit 2ac6516

Browse files
committed
Import Glasgow's IOStreamer
1 parent d91e09b commit 2ac6516

File tree

2 files changed

+505
-0
lines changed

2 files changed

+505
-0
lines changed

chipflow_lib/platforms/iostream.py

Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
from amaranth import *
2+
from amaranth.lib import enum, data, wiring, stream, io
3+
from amaranth.lib.wiring import In, Out
4+
5+
from glasgow.gateware.ports import PortGroup
6+
7+
8+
__all__ = ["IOStreamer"]
9+
10+
11+
def _filter_ioshape(direction, ioshape):
12+
direction = io.Direction(direction)
13+
if direction is io.Direction.Bidir:
14+
return True
15+
return io.Direction(ioshape[0]) in (direction, io.Direction.Bidir)
16+
17+
18+
def _iter_ioshape(direction, ioshape, *args): # actually filter+iter
19+
for name, item in ioshape.items():
20+
if _filter_ioshape(direction, ioshape[name]):
21+
yield tuple(arg[name] for arg in args)
22+
23+
24+
def _map_ioshape(direction, ioshape, fn): # actually filter+map
25+
return data.StructLayout({
26+
name: fn(item[1]) for name, item in ioshape.items() if _filter_ioshape(direction, item)
27+
})
28+
29+
30+
class SimulatableDDRBuffer(io.DDRBuffer):
31+
def elaborate(self, platform):
32+
if not isinstance(self._port, io.SimulationPort):
33+
return super().elaborate(platform)
34+
35+
# At the time of writing Amaranth DDRBuffer doesn't allow for simulation, this implements
36+
# ICE40 semantics for simulation.
37+
m = Module()
38+
39+
m.submodules.io_buffer = io_buffer = io.Buffer(self.direction, self.port)
40+
41+
if self.direction is not io.Direction.Output:
42+
m.domains.i_domain_negedge = ClockDomain("i_domain_negedge", local=True)
43+
m.d.comb += ClockSignal("i_domain_negedge").eq(~ClockSignal(self.i_domain))
44+
i_ff = Signal(len(self.port), reset_less=True)
45+
i_negedge_ff = Signal(len(self.port), reset_less=True)
46+
i_final_ff = Signal(data.ArrayLayout(len(self.port), 2), reset_less=True)
47+
m.d[self.i_domain] += i_ff.eq(io_buffer.i)
48+
m.d["i_domain_negedge"] += i_negedge_ff.eq(io_buffer.i)
49+
m.d[self.i_domain] += i_final_ff.eq(Cat(i_ff, i_negedge_ff))
50+
m.d.comb += self.i.eq(i_final_ff)
51+
52+
if self.direction is not io.Direction.Input:
53+
m.domains.o_domain_negedge = ClockDomain("o_domain_negedge", local=True)
54+
m.d.comb += ClockSignal("o_domain_negedge").eq(~ClockSignal(self.o_domain))
55+
o_ff = Signal(len(self.port), reset_less=True)
56+
o_negedge_ff = Signal(len(self.port), reset_less=True)
57+
oe_ff = Signal(reset_less=True)
58+
m.d[self.o_domain] += o_ff.eq(self.o[0] ^ o_negedge_ff)
59+
o1_ff = Signal(len(self.port), reset_less=True)
60+
m.d[self.o_domain] += o1_ff.eq(self.o[1])
61+
m.d["o_domain_negedge"] += o_negedge_ff.eq(o1_ff ^ o_ff)
62+
m.d[self.o_domain] += oe_ff.eq(self.oe)
63+
m.d.comb += io_buffer.o.eq(o_ff ^ o_negedge_ff)
64+
m.d.comb += io_buffer.oe.eq(oe_ff)
65+
66+
return m
67+
68+
69+
class IOStreamer(wiring.Component):
70+
"""I/O buffer to stream adapter.
71+
72+
This adapter instantiates I/O buffers for a port (FF or DDR) and connects them to a pair of
73+
streams, one for the outputs of the buffers and one for the inputs. Whenever an `o_stream`
74+
transfer occurs, the state of the output is updated _t1_ cycles later; if `o_stream.p.i_en`
75+
is set, then _t2_ cycles later, a payload with the data captured at the same time as
76+
the outputs were updated appears on `i_stream.p.i`.
77+
78+
Arbitrary ancillary data may be provided with `o_stream` transfers via `o_stream.p.meta`,
79+
and this data will be relayed back as `i_stream.p.meta` with the output-to-input latency
80+
of the buffer. Higher-level protocol engines can use this data to indicate how the inputs
81+
must be processed without needing counters or state machines on a higher level to match
82+
the latency (and, usually, without needing any knowledge of the latency at all).
83+
84+
On reset, output ports have their drivers enabled, and bidirectional ports have them disabled.
85+
All of the signals are deasserted, which could be a low or a high level depending on the port
86+
polarity.
87+
"""
88+
89+
@staticmethod
90+
def o_stream_signature(ioshape, /, *, ratio=1, meta_layout=0):
91+
return stream.Signature(data.StructLayout({
92+
"port": _map_ioshape("o", ioshape, lambda width: data.StructLayout({
93+
"o": width if ratio == 1 else data.ArrayLayout(width, ratio),
94+
"oe": 1,
95+
})),
96+
"i_en": 1,
97+
"meta": meta_layout,
98+
}))
99+
100+
@staticmethod
101+
def i_stream_signature(ioshape, /, *, ratio=1, meta_layout=0):
102+
return stream.Signature(data.StructLayout({
103+
"port": _map_ioshape("i", ioshape, lambda width: data.StructLayout({
104+
"i": width if ratio == 1 else data.ArrayLayout(width, ratio),
105+
})),
106+
"meta": meta_layout,
107+
}))
108+
109+
def __init__(self, ioshape, ports, /, *, ratio=1, init=None, meta_layout=0):
110+
assert isinstance(ioshape, (int, dict))
111+
assert ratio in (1, 2)
112+
113+
self._ioshape = ioshape
114+
self._ports = ports
115+
self._ratio = ratio
116+
self._init = init
117+
118+
super().__init__({
119+
"o_stream": In(self.o_stream_signature(ioshape, ratio=ratio, meta_layout=meta_layout)),
120+
"i_stream": Out(self.i_stream_signature(ioshape, ratio=ratio, meta_layout=meta_layout)),
121+
})
122+
123+
def elaborate(self, platform):
124+
m = Module()
125+
126+
if self._ratio == 1:
127+
buffer_cls, latency = io.FFBuffer, 1
128+
if self._ratio == 2:
129+
# FIXME: should this be 2 or 3? the latency differs between i[0] and i[1]
130+
buffer_cls, latency = SimulatableDDRBuffer, 3
131+
132+
if isinstance(self._ports, io.PortLike):
133+
m.submodules.buffer = buffer = buffer_cls("io", self._ports)
134+
if isinstance(self._ports, PortGroup):
135+
buffer = {}
136+
for name, sub_port in self._ports.__dict__.items():
137+
direction, _width = self._ioshape[name]
138+
m.submodules[f"buffer_{name}"] = buffer[name] = buffer_cls(direction, sub_port)
139+
140+
o_latch = Signal(_map_ioshape("o", self._ioshape, lambda width: data.StructLayout({
141+
"o": width,
142+
"oe": 1,
143+
})), init=self._init)
144+
with m.If(self.o_stream.valid & self.o_stream.ready):
145+
for buffer_parts, stream_parts in _iter_ioshape("o", self._ioshape,
146+
buffer, self.o_stream.p.port):
147+
m.d.comb += buffer_parts.o.eq(stream_parts.o)
148+
m.d.comb += buffer_parts.oe.eq(stream_parts.oe)
149+
for latch_parts, stream_parts in _iter_ioshape("o", self._ioshape,
150+
o_latch, self.o_stream.p.port):
151+
if self._ratio == 1:
152+
m.d.sync += latch_parts.o.eq(stream_parts.o)
153+
else:
154+
m.d.sync += latch_parts.o.eq(stream_parts.o[-1])
155+
m.d.sync += latch_parts.oe.eq(stream_parts.oe)
156+
with m.Else():
157+
for buffer_parts, latch_parts in _iter_ioshape("o", self._ioshape,
158+
buffer, o_latch):
159+
if self._ratio == 1:
160+
m.d.comb += buffer_parts.o.eq(latch_parts.o)
161+
else:
162+
m.d.comb += buffer_parts.o.eq(latch_parts.o.replicate(self._ratio))
163+
m.d.comb += buffer_parts.oe.eq(latch_parts.oe)
164+
165+
def delay(value, name):
166+
for stage in range(latency):
167+
next_value = Signal.like(value, name=f"{name}_{stage}")
168+
m.d.sync += next_value.eq(value)
169+
value = next_value
170+
return value
171+
172+
i_en = delay(self.o_stream.valid & self.o_stream.ready &
173+
self.o_stream.p.i_en, name="i_en")
174+
meta = delay(self.o_stream.p.meta, name="meta")
175+
176+
# This skid buffer is organized as a shift register to avoid any uncertainties associated
177+
# with the use of an async read memory. On platforms that have LUTRAM, this implementation
178+
# may be slightly worse than using LUTRAM, and may have to be revisited in the future.
179+
skid = Array(Signal(self.i_stream.payload.shape(), name=f"skid_{stage}")
180+
for stage in range(1 + latency))
181+
for skid_parts, buffer_parts in _iter_ioshape("i", self._ioshape, skid[0].port, buffer):
182+
m.d.comb += skid_parts.i.eq(buffer_parts.i)
183+
m.d.comb += skid[0].meta.eq(meta)
184+
185+
skid_at = Signal(range(1 + latency))
186+
with m.If(i_en & ~self.i_stream.ready):
187+
# m.d.sync += Assert(skid_at != latency)
188+
m.d.sync += skid_at.eq(skid_at + 1)
189+
for n_shift in range(latency):
190+
m.d.sync += skid[n_shift + 1].eq(skid[n_shift])
191+
with m.Elif((skid_at != 0) & self.i_stream.ready):
192+
m.d.sync += skid_at.eq(skid_at - 1)
193+
194+
m.d.comb += self.i_stream.payload.eq(skid[skid_at])
195+
m.d.comb += self.i_stream.valid.eq(i_en | (skid_at != 0))
196+
m.d.comb += self.o_stream.ready.eq(self.i_stream.ready & (skid_at == 0))
197+
198+
return m
199+
200+
201+
class IOClocker(wiring.Component):
202+
@staticmethod
203+
def i_stream_signature(ioshape, /, *, _ratio=1, meta_layout=0):
204+
# Currently the only supported ratio is 1, but this will change in the future for
205+
# interfaces like HyperBus.
206+
return stream.Signature(data.StructLayout({
207+
"bypass": 1,
208+
"port": _map_ioshape("o", ioshape, lambda width: data.StructLayout({
209+
"o": width if _ratio == 1 else data.ArrayLayout(width, _ratio),
210+
"oe": 1,
211+
})),
212+
"i_en": 1,
213+
"meta": meta_layout,
214+
}))
215+
216+
@staticmethod
217+
def o_stream_signature(ioshape, /, *, ratio=1, meta_layout=0):
218+
return IOStreamer.o_stream_signature(ioshape, ratio=ratio, meta_layout=meta_layout)
219+
220+
def __init__(self, ioshape, *, clock, o_ratio=1, meta_layout=0, divisor_width=16):
221+
assert isinstance(ioshape, dict)
222+
assert isinstance(clock, str)
223+
assert o_ratio in (1, 2)
224+
assert clock in ioshape
225+
226+
self._clock = clock
227+
self._ioshape = ioshape
228+
self._o_ratio = o_ratio
229+
230+
super().__init__({
231+
"i_stream": In(self.i_stream_signature(ioshape,
232+
meta_layout=meta_layout)),
233+
"o_stream": Out(self.o_stream_signature(ioshape,
234+
ratio=o_ratio, meta_layout=meta_layout)),
235+
236+
# f_clk = f_sync if (o_ratio == 2 and divisor == 0) else f_sync / (2 * max(1, divisor))
237+
"divisor": In(divisor_width),
238+
})
239+
240+
def elaborate(self, platform):
241+
m = Module()
242+
243+
# Forward the inputs to the outputs as-is. This includes the clock; it is overridden below
244+
# if the clocker is used (not bypassed).
245+
for i_parts, o_parts in _iter_ioshape("io", self._ioshape,
246+
self.i_stream.p.port, self.o_stream.p.port):
247+
m.d.comb += o_parts.o .eq(i_parts.o.replicate(self._o_ratio))
248+
m.d.comb += o_parts.oe.eq(i_parts.oe)
249+
m.d.comb += self.o_stream.p.i_en.eq(self.i_stream.p.i_en)
250+
m.d.comb += self.o_stream.p.meta.eq(self.i_stream.p.meta)
251+
252+
phase = Signal()
253+
# If the clocker is used...
254+
with m.If(~self.i_stream.p.bypass):
255+
# ... ignore the clock in the inputs and replace it with the generated one...
256+
if self._o_ratio == 1:
257+
m.d.comb += self.o_stream.p.port[self._clock].o.eq(phase)
258+
if self._o_ratio == 2:
259+
m.d.comb += self.o_stream.p.port[self._clock].o.eq(Cat(~phase, phase))
260+
m.d.comb += self.o_stream.p.port[self._clock].oe.eq(1)
261+
# ... while requesting input sampling only for the rising edge. (Interfaces triggering
262+
# transfers on falling edge will be inverting the clock at the `IOPort` level.)
263+
m.d.comb += self.o_stream.p.i_en.eq(self.i_stream.p.i_en & phase)
264+
265+
timer = Signal.like(self.divisor)
266+
with m.If((timer == 0) | (timer == 1)):
267+
# Only produce output when the timer has expired. This ensures that no clock pulse
268+
# exceeds the frequency set by `divisor`, except the ones that bypass the clocker.
269+
m.d.comb += self.o_stream.valid.eq(self.i_stream.valid)
270+
271+
with m.FSM():
272+
with m.State("Falling"):
273+
with m.If(self.i_stream.p.bypass): # Bypass the clocker entirely.
274+
m.d.comb += self.i_stream.ready.eq(self.o_stream.ready)
275+
276+
with m.Else(): # Produce a falling edge at the output.
277+
# Whenever DDR output is used, `phase == 1` outputs a low state first and
278+
# a high state second. When `phase == 1` payloads are output back to back
279+
# (in DDR mode only!) this generates a pulse train with data changes
280+
# coinciding with the falling edges. Setting `divisor == 0` in this mode
281+
# allows clocking the peripheral at the `sync` frequency.
282+
with m.If((self._o_ratio == 2) & (self.divisor == 0)):
283+
m.d.comb += phase.eq(1)
284+
with m.If(self.o_stream.ready):
285+
m.d.comb += self.i_stream.ready.eq(1)
286+
with m.Else():
287+
m.d.comb += phase.eq(0)
288+
with m.If(self.o_stream.ready & self.i_stream.valid):
289+
m.d.sync += timer.eq(self.divisor)
290+
m.next = "Rising"
291+
292+
with m.State("Rising"):
293+
m.d.comb += phase.eq(1)
294+
with m.If(self.o_stream.ready):
295+
m.d.comb += self.i_stream.ready.eq(1)
296+
m.d.sync += timer.eq(self.divisor)
297+
m.next = "Falling"
298+
299+
with m.Else():
300+
m.d.sync += timer.eq(timer - 1)
301+
302+
return m

0 commit comments

Comments
 (0)