Skip to content

Commit a613b45

Browse files
olizillaAlan Shaw
andauthored
feat: getPath with carScope (#8)
add getPath method as a generator that returns blocks for the targeted dag and all blocks traversed while resolving a cid+path string supports carScope to specify what blocks to return for the resolved dag - `'all'`: return the entire dag starting at path. (default) - `'block'`: return the block identified by the path. - `'file'`: Mimic gateway semantics: Return All blocks for a multi-block file or just enough blocks to enumerate a dir/map but not the dir contents. see: storacha/freeway#33 see: storacha/freeway#34 see: ipfs/specs#402 TODO: - [x] find out how to identify the boundaries of a unixfs hamt ...unixfs-exporter seems to define it as "not having an empty or null Link.Name after the first 2 chars are stripped, which seems loose... what happens if the actual dir listing has 2 char long link names? see: https://github.com/ipfs/js-ipfs-unixfs/blob/e853049bd63d6773442e1540ae49b6a443ca8672/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts#L20-L42 License: MIT --------- Signed-off-by: Oli Evans <[email protected]> Co-authored-by: Alan Shaw <[email protected]>
1 parent e927919 commit a613b45

File tree

9 files changed

+9507
-5415
lines changed

9 files changed

+9507
-5415
lines changed

bin.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ cli.command('get <cid>')
6363
.describe('Fetch a DAG from the peer. Outputs a CAR file.')
6464
.option('-p, --peer', 'Address of peer to fetch data from.')
6565
.option('-t, --timeout', 'Timeout in milliseconds.', TIMEOUT)
66-
.action(async (cid, { peer, timeout }) => {
67-
cid = CID.parse(cid)
66+
.action(async (cidPath, { peer, timeout }) => {
67+
const [cidStr] = cidPath.replace(/^\/ipfs\//, '').split('/')
68+
const cid = CID.parse(cidStr)
6869
const controller = new TimeoutController(timeout)
6970
const libp2p = await getLibp2p()
7071
const dagula = await fromNetwork(libp2p, { peer, hashers })
@@ -73,7 +74,7 @@ cli.command('get <cid>')
7374
let error
7475
;(async () => {
7576
try {
76-
for await (const block of dagula.get(cid, { signal: controller.signal })) {
77+
for await (const block of dagula.getPath(cidPath, { signal: controller.signal })) {
7778
controller.reset()
7879
await writer.put(block)
7980
}

index.d.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,21 @@ export interface Network {
3030
handle: (protocol: string | string[], handler: StreamHandler) => Promise<void>
3131
}
3232

33+
export type CarScope = 'all'|'file'|'block'
34+
35+
export interface CarScopeOptions {
36+
carScope?: CarScope
37+
}
38+
3339
export interface IDagula {
3440
/**
3541
* Get a complete DAG.
3642
*/
3743
get (cid: CID|string, options?: AbortOptions): AsyncIterableIterator<Block>
44+
/**
45+
* Get a DAG for a cid+path
46+
*/
47+
getPath (cidPath: string, options?: AbortOptions & CarScopeOptions): AsyncIterableIterator<Block>
3848
/**
3949
* Get a single block.
4050
*/
@@ -55,6 +65,10 @@ export declare class Dagula implements IDagula {
5565
* Get a complete DAG.
5666
*/
5767
get (cid: CID|string, options?: AbortOptions): AsyncIterableIterator<Block>
68+
/**
69+
* Get a DAG for a cid+path
70+
*/
71+
getPath (cidPath: string, options?: AbortOptions & CarScopeOptions): AsyncIterableIterator<Block>
5872
/**
5973
* Get a single block.
6074
*/

index.js

Lines changed: 119 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import debug from 'debug'
22
import { CID } from 'multiformats/cid'
3+
import * as dagPB from '@ipld/dag-pb'
34
import * as Block from 'multiformats/block'
45
import { exporter, walkPath } from 'ipfs-unixfs-exporter'
56
import { transform } from 'streaming-iterables'
@@ -29,20 +30,23 @@ export class Dagula {
2930
}
3031

3132
/**
32-
* @param {import('multiformats').CID|string} cid
33-
* @param {{ signal?: AbortSignal }} [options]
33+
* @param {CID[]|CID|string} cid
34+
* @param {object} [options]
35+
* @param {AbortSignal} [options.signal]
36+
* @param {(block: import('multiformats').BlockView) => CID[]} [options.search]
3437
*/
3538
async * get (cid, options = {}) {
3639
cid = typeof cid === 'string' ? CID.parse(cid) : cid
3740
log('getting DAG %s', cid)
41+
let cids = Array.isArray(cid) ? cid : [cid]
42+
const search = options.search || breadthFirstSearch()
3843

3944
/** @type {AbortController[]} */
4045
let aborters = []
4146
const { signal } = options
4247
signal?.addEventListener('abort', () => aborters.forEach(a => a.abort(signal.reason)))
4348

44-
let cids = [cid]
45-
while (true) {
49+
while (cids.length > 0) {
4650
log('fetching %d CIDs', cids.length)
4751
const fetchBlocks = transform(cids.length, async cid => {
4852
if (signal) {
@@ -54,7 +58,7 @@ export class Dagula {
5458
}
5559
return this.getBlock(cid)
5660
})
57-
const nextCids = []
61+
let nextCids = []
5862
for await (const { cid, bytes } of fetchBlocks(cids)) {
5963
const decoder = this.#decoders[cid.code]
6064
if (!decoder) {
@@ -72,16 +76,87 @@ export class Dagula {
7276
// createUnsafe here.
7377
const block = await Block.create({ bytes, cid, codec: decoder, hasher })
7478
yield block
75-
for (const [, cid] of block.links()) {
76-
nextCids.push(cid)
77-
}
79+
nextCids = nextCids.concat(search(block))
7880
}
79-
if (!nextCids.length) break
8081
log('%d CIDs in links', nextCids.length)
8182
cids = nextCids
8283
}
8384
}
8485

86+
/**
87+
* Yield all blocks traversed to resolve the ipfs path.
88+
* Then use carScope to determine the set of blocks of the targeted dag to yield.
89+
* Yield all blocks by default.
90+
* Use carScope: 'block' to yield the termimal block.
91+
* Use carScope: 'file' to yield all the blocks of a unixfs file, or enough blocks to list a directory.
92+
*
93+
* @param {string} cidPath
94+
* @param {object} [options]
95+
* @param {AbortSignal} [options.signal]
96+
* @param {'all'|'file'|'block'} [options.carScope] control how many layers of the dag are returned
97+
* 'all': return the entire dag starting at path. (default)
98+
* 'block': return the block identified by the path.
99+
* 'file': Mimic gateway semantics: Return All blocks for a multi-block file or just enough blocks to enumerate a dir/map but not the dir contents.
100+
* Where path points to a single block file, all three selectors would return the same thing.
101+
* where path points to a sharded hamt: 'file' returns the blocks of the hamt so the dir can be listed. 'block' returns the root block of the hamt.
102+
*/
103+
async * getPath (cidPath, options = {}) {
104+
const carScope = options.carScope ?? 'all'
105+
106+
/**
107+
* The resolved dag root at the terminus of the cidPath
108+
* @type {import('ipfs-unixfs-exporter').UnixFSEntry}
109+
*/
110+
let base
111+
112+
/**
113+
* Cache of blocks required to resove the cidPath
114+
* @type {import('./index').Block[]}
115+
*/
116+
let traversed = []
117+
118+
/**
119+
* Adapter for unixfs-exporter to track the blocks it loads as it resolves the path.
120+
* `walkPath` emits a single unixfs entry for multiblock structures, but we need the individual blocks.
121+
* TODO: port logic to @web3-storage/ipfs-path to make this less ugly.
122+
*/
123+
const blockstore = {
124+
/**
125+
* @param {CID} cid
126+
* @param {{ signal?: AbortSignal }} [options]
127+
*/
128+
get: async (cid, options) => {
129+
const block = await this.getBlock(cid, options)
130+
traversed.push(block)
131+
return block.bytes
132+
}
133+
}
134+
for await (const item of walkPath(cidPath, blockstore, { signal: options.signal })) {
135+
base = item
136+
yield * traversed
137+
traversed = []
138+
}
139+
140+
if (carScope === 'all' || (carScope === 'file' && base.type !== 'directory')) {
141+
const links = base.node.Links?.map(l => l.Hash) || []
142+
// fetch the entire dag rooted at the end of the provided path
143+
if (links.length) {
144+
yield * this.get(links, { signal: options.signal })
145+
}
146+
}
147+
// non-files, like directories, and IPLD Maps only return blocks necessary for their enumeration
148+
if (carScope === 'file' && base.type === 'directory') {
149+
// the single block for the root has already been yielded.
150+
// For a hamt we must fetch all the blocks of the (current) hamt.
151+
if (base.unixfs.type === 'hamt-sharded-directory') {
152+
const hamtLinks = base.node.Links?.filter(l => l.Name.length === 2).map(l => l.Hash) || []
153+
if (hamtLinks.length) {
154+
yield * this.get(hamtLinks, { search: hamtSearch, signal: options.signal })
155+
}
156+
}
157+
}
158+
}
159+
85160
/**
86161
* @param {import('multiformats').CID|string} cid
87162
* @param {{ signal?: AbortSignal }} [options]
@@ -117,11 +192,11 @@ export class Dagula {
117192
}
118193

119194
/**
120-
* @param {string|import('multiformats').CID} path
195+
* @param {string} cidPath
121196
* @param {{ signal?: AbortSignal }} [options]
122197
*/
123-
async * walkUnixfsPath (path, options = {}) {
124-
log('walking unixfs %s', path)
198+
async * walkUnixfsPath (cidPath, options = {}) {
199+
log('walking unixfs %s', cidPath)
125200
const blockstore = {
126201
/**
127202
* @param {CID} cid
@@ -132,8 +207,38 @@ export class Dagula {
132207
return block.bytes
133208
}
134209
}
210+
yield * walkPath(cidPath, blockstore, { signal: options.signal })
211+
}
212+
}
135213

136-
// @ts-ignore exporter requires Blockstore but only uses `get`
137-
yield * walkPath(path, blockstore, { signal: options.signal })
214+
/**
215+
* Create a search function that given a decoded Block
216+
* will return an array of CIDs to fetch next.
217+
*
218+
* @param {([name, cid]: [string, Link]) => boolean} linkFilter
219+
*/
220+
export function breadthFirstSearch (linkFilter = () => true) {
221+
/**
222+
* @param {import('multiformats').BlockView} block
223+
*/
224+
return function (block) {
225+
const nextCids = []
226+
if (block.cid.code === dagPB.code) {
227+
for (const { Name, Hash } of block.value.Links) {
228+
if (linkFilter([Name, Hash])) {
229+
nextCids.push(Hash)
230+
}
231+
}
232+
} else {
233+
// links() paths dagPb in the ipld style so name is e.g `Links/0/Hash`, and not what we want here.
234+
for (const link of block.links()) {
235+
if (linkFilter(link)) {
236+
nextCids.push(link[1])
237+
}
238+
}
239+
}
240+
return nextCids
138241
}
139242
}
243+
244+
export const hamtSearch = breadthFirstSearch(([name]) => name.length === 2)

0 commit comments

Comments
 (0)