11import debug from 'debug'
22import { CID } from 'multiformats/cid'
3+ import * as dagPB from '@ipld/dag-pb'
34import * as Block from 'multiformats/block'
45import { exporter , walkPath } from 'ipfs-unixfs-exporter'
56import { transform } from 'streaming-iterables'
@@ -29,20 +30,23 @@ export class Dagula {
2930 }
3031
3132 /**
32- * @param {import('multiformats').CID|string } cid
33- * @param {{ signal?: AbortSignal } } [options]
33+ * @param {CID[]|CID|string } cid
34+ * @param {object } [options]
35+ * @param {AbortSignal } [options.signal]
36+ * @param {(block: import('multiformats').BlockView) => CID[] } [options.search]
3437 */
3538 async * get ( cid , options = { } ) {
3639 cid = typeof cid === 'string' ? CID . parse ( cid ) : cid
3740 log ( 'getting DAG %s' , cid )
41+ let cids = Array . isArray ( cid ) ? cid : [ cid ]
42+ const search = options . search || breadthFirstSearch ( )
3843
3944 /** @type {AbortController[] } */
4045 let aborters = [ ]
4146 const { signal } = options
4247 signal ?. addEventListener ( 'abort' , ( ) => aborters . forEach ( a => a . abort ( signal . reason ) ) )
4348
44- let cids = [ cid ]
45- while ( true ) {
49+ while ( cids . length > 0 ) {
4650 log ( 'fetching %d CIDs' , cids . length )
4751 const fetchBlocks = transform ( cids . length , async cid => {
4852 if ( signal ) {
@@ -54,7 +58,7 @@ export class Dagula {
5458 }
5559 return this . getBlock ( cid )
5660 } )
57- const nextCids = [ ]
61+ let nextCids = [ ]
5862 for await ( const { cid, bytes } of fetchBlocks ( cids ) ) {
5963 const decoder = this . #decoders[ cid . code ]
6064 if ( ! decoder ) {
@@ -72,16 +76,87 @@ export class Dagula {
7276 // createUnsafe here.
7377 const block = await Block . create ( { bytes, cid, codec : decoder , hasher } )
7478 yield block
75- for ( const [ , cid ] of block . links ( ) ) {
76- nextCids . push ( cid )
77- }
79+ nextCids = nextCids . concat ( search ( block ) )
7880 }
79- if ( ! nextCids . length ) break
8081 log ( '%d CIDs in links' , nextCids . length )
8182 cids = nextCids
8283 }
8384 }
8485
86+ /**
87+ * Yield all blocks traversed to resolve the ipfs path.
88+ * Then use carScope to determine the set of blocks of the targeted dag to yield.
89+ * Yield all blocks by default.
90+ * Use carScope: 'block' to yield the termimal block.
91+ * Use carScope: 'file' to yield all the blocks of a unixfs file, or enough blocks to list a directory.
92+ *
93+ * @param {string } cidPath
94+ * @param {object } [options]
95+ * @param {AbortSignal } [options.signal]
96+ * @param {'all'|'file'|'block' } [options.carScope] control how many layers of the dag are returned
97+ * 'all': return the entire dag starting at path. (default)
98+ * 'block': return the block identified by the path.
99+ * 'file': Mimic gateway semantics: Return All blocks for a multi-block file or just enough blocks to enumerate a dir/map but not the dir contents.
100+ * Where path points to a single block file, all three selectors would return the same thing.
101+ * where path points to a sharded hamt: 'file' returns the blocks of the hamt so the dir can be listed. 'block' returns the root block of the hamt.
102+ */
103+ async * getPath ( cidPath , options = { } ) {
104+ const carScope = options . carScope ?? 'all'
105+
106+ /**
107+ * The resolved dag root at the terminus of the cidPath
108+ * @type {import('ipfs-unixfs-exporter').UnixFSEntry }
109+ */
110+ let base
111+
112+ /**
113+ * Cache of blocks required to resove the cidPath
114+ * @type {import('./index').Block[] }
115+ */
116+ let traversed = [ ]
117+
118+ /**
119+ * Adapter for unixfs-exporter to track the blocks it loads as it resolves the path.
120+ * `walkPath` emits a single unixfs entry for multiblock structures, but we need the individual blocks.
121+ * TODO: port logic to @web3-storage/ipfs-path to make this less ugly.
122+ */
123+ const blockstore = {
124+ /**
125+ * @param {CID } cid
126+ * @param {{ signal?: AbortSignal } } [options]
127+ */
128+ get : async ( cid , options ) => {
129+ const block = await this . getBlock ( cid , options )
130+ traversed . push ( block )
131+ return block . bytes
132+ }
133+ }
134+ for await ( const item of walkPath ( cidPath , blockstore , { signal : options . signal } ) ) {
135+ base = item
136+ yield * traversed
137+ traversed = [ ]
138+ }
139+
140+ if ( carScope === 'all' || ( carScope === 'file' && base . type !== 'directory' ) ) {
141+ const links = base . node . Links ?. map ( l => l . Hash ) || [ ]
142+ // fetch the entire dag rooted at the end of the provided path
143+ if ( links . length ) {
144+ yield * this . get ( links , { signal : options . signal } )
145+ }
146+ }
147+ // non-files, like directories, and IPLD Maps only return blocks necessary for their enumeration
148+ if ( carScope === 'file' && base . type === 'directory' ) {
149+ // the single block for the root has already been yielded.
150+ // For a hamt we must fetch all the blocks of the (current) hamt.
151+ if ( base . unixfs . type === 'hamt-sharded-directory' ) {
152+ const hamtLinks = base . node . Links ?. filter ( l => l . Name . length === 2 ) . map ( l => l . Hash ) || [ ]
153+ if ( hamtLinks . length ) {
154+ yield * this . get ( hamtLinks , { search : hamtSearch , signal : options . signal } )
155+ }
156+ }
157+ }
158+ }
159+
85160 /**
86161 * @param {import('multiformats').CID|string } cid
87162 * @param {{ signal?: AbortSignal } } [options]
@@ -117,11 +192,11 @@ export class Dagula {
117192 }
118193
119194 /**
120- * @param {string|import('multiformats').CID } path
195+ * @param {string } cidPath
121196 * @param {{ signal?: AbortSignal } } [options]
122197 */
123- async * walkUnixfsPath ( path , options = { } ) {
124- log ( 'walking unixfs %s' , path )
198+ async * walkUnixfsPath ( cidPath , options = { } ) {
199+ log ( 'walking unixfs %s' , cidPath )
125200 const blockstore = {
126201 /**
127202 * @param {CID } cid
@@ -132,8 +207,38 @@ export class Dagula {
132207 return block . bytes
133208 }
134209 }
210+ yield * walkPath ( cidPath , blockstore , { signal : options . signal } )
211+ }
212+ }
135213
136- // @ts -ignore exporter requires Blockstore but only uses `get`
137- yield * walkPath ( path , blockstore , { signal : options . signal } )
214+ /**
215+ * Create a search function that given a decoded Block
216+ * will return an array of CIDs to fetch next.
217+ *
218+ * @param {([name, cid]: [string, Link]) => boolean } linkFilter
219+ */
220+ export function breadthFirstSearch ( linkFilter = ( ) => true ) {
221+ /**
222+ * @param {import('multiformats').BlockView } block
223+ */
224+ return function ( block ) {
225+ const nextCids = [ ]
226+ if ( block . cid . code === dagPB . code ) {
227+ for ( const { Name, Hash } of block . value . Links ) {
228+ if ( linkFilter ( [ Name , Hash ] ) ) {
229+ nextCids . push ( Hash )
230+ }
231+ }
232+ } else {
233+ // links() paths dagPb in the ipld style so name is e.g `Links/0/Hash`, and not what we want here.
234+ for ( const link of block . links ( ) ) {
235+ if ( linkFilter ( link ) ) {
236+ nextCids . push ( link [ 1 ] )
237+ }
238+ }
239+ }
240+ return nextCids
138241 }
139242}
243+
244+ export const hamtSearch = breadthFirstSearch ( ( [ name ] ) => name . length === 2 )
0 commit comments