diff --git a/packages/ipfs-unixfs-exporter/src/index.ts b/packages/ipfs-unixfs-exporter/src/index.ts index e8b06f59..ee6c2e9e 100644 --- a/packages/ipfs-unixfs-exporter/src/index.ts +++ b/packages/ipfs-unixfs-exporter/src/index.ts @@ -57,6 +57,7 @@ import type { PBNode } from '@ipld/dag-pb' import type { Bucket } from 'hamt-sharding' import type { Blockstore } from 'interface-blockstore' import type { UnixFS } from 'ipfs-unixfs' +import type { AbortOptions } from 'it-pushable' import type { ProgressOptions, ProgressEvent } from 'progress-events' export * from './errors.js' @@ -136,6 +137,21 @@ export interface ExporterOptions extends ProgressOptions blockReadConcurrency?: number } +export interface BasicExporterOptions extends ExporterOptions { + /** + * When directory contents are listed, by default the root node of each entry + * is fetched to decode the UnixFS metadata and know if the entry is a file or + * a directory. This can result in fetching extra data which may not be + * desirable, depending on your application. + * + * Pass false here to only return the CID and the name of the entry and not + * any extended metadata. + * + * @default true + */ + extended: false +} + export interface Exportable { /** * A disambiguator to allow TypeScript to work out the type of the entry. @@ -218,7 +234,7 @@ export interface Exportable { * // `entries` contains the first 5 files/directories in the directory * ``` */ - content(options?: ExporterOptions): AsyncGenerator + content(options?: ExporterOptions | BasicExporterOptions): AsyncGenerator } /** @@ -316,7 +332,39 @@ export interface Resolver { (cid: CID, name: string, path: string, toResolve: st export type UnixfsV1FileContent = AsyncIterable | Iterable export type UnixfsV1DirectoryContent = AsyncIterable | Iterable export type UnixfsV1Content = UnixfsV1FileContent | UnixfsV1DirectoryContent -export interface UnixfsV1Resolver { (cid: CID, node: PBNode, unixfs: UnixFS, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage): (options: ExporterOptions) => UnixfsV1Content } + +export interface UnixfsV1BasicContent { + /** + * The name of the entry + */ + name: string + + /** + * The path of the entry within the DAG in which it was encountered + */ + path: string + + /** + * The CID of the entry + */ + cid: CID + + /** + * Resolve the root node of the entry to parse the UnixFS metadata contained + * there. The metadata will contain what kind of node it is (e.g. file, + * directory, etc), the file size, and more. + */ + resolve(options?: AbortOptions): Promise +} + +export interface UnixFsV1ContentResolver { + (options: ExporterOptions): UnixfsV1Content + (options: BasicExporterOptions): UnixfsV1BasicContent +} + +export interface UnixfsV1Resolver { + (cid: CID, node: PBNode, unixfs: UnixFS, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage): (options: ExporterOptions) => UnixfsV1Content +} export interface ShardTraversalContext { hamtDepth: number diff --git a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/directory.ts b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/directory.ts index afab2634..614b33ca 100644 --- a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/directory.ts +++ b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/directory.ts @@ -3,10 +3,11 @@ import map from 'it-map' import parallel from 'it-parallel' import { pipe } from 'it-pipe' import { CustomProgressEvent } from 'progress-events' -import type { ExporterOptions, ExportWalk, UnixfsV1DirectoryContent, UnixfsV1Resolver } from '../../../index.js' +import { isBasicExporterOptions } from '../../../utils/is-basic-exporter-options.ts' +import type { BasicExporterOptions, ExporterOptions, ExportWalk, UnixFSEntry, UnixfsV1BasicContent, UnixfsV1Resolver } from '../../../index.js' const directoryContent: UnixfsV1Resolver = (cid, node, unixfs, path, resolve, depth, blockstore) => { - async function * yieldDirectoryContent (options: ExporterOptions = {}): UnixfsV1DirectoryContent { + async function * yieldDirectoryContent (options: ExporterOptions | BasicExporterOptions = {}): any { const offset = options.offset ?? 0 const length = options.length ?? node.Links.length const links = node.Links.slice(offset, length) @@ -21,8 +22,24 @@ const directoryContent: UnixfsV1Resolver = (cid, node, unixfs, path, resolve, de return async () => { const linkName = link.Name ?? '' const linkPath = `${path}/${linkName}` - const result = await resolve(link.Hash, linkName, linkPath, [], depth + 1, blockstore, options) - return result.entry + + const load = async (options = {}): Promise => { + const result = await resolve(link.Hash, linkName, linkPath, [], depth + 1, blockstore, options) + return result.entry + } + + if (isBasicExporterOptions(options)) { + const basic: UnixfsV1BasicContent = { + cid: link.Hash, + name: linkName, + path: linkPath, + resolve: load + } + + return basic + } + + return load(options) } }), source => parallel(source, { diff --git a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts index b08255a2..a3f56189 100644 --- a/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts +++ b/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts @@ -5,11 +5,12 @@ import parallel from 'it-parallel' import { pipe } from 'it-pipe' import { CustomProgressEvent } from 'progress-events' import { NotUnixFSError } from '../../../errors.js' -import type { ExporterOptions, Resolve, UnixfsV1DirectoryContent, UnixfsV1Resolver, ReadableStorage, ExportWalk } from '../../../index.js' +import { isBasicExporterOptions } from '../../../utils/is-basic-exporter-options.ts' +import type { ExporterOptions, Resolve, UnixfsV1DirectoryContent, UnixfsV1Resolver, ReadableStorage, ExportWalk, BasicExporterOptions, UnixFSEntry } from '../../../index.js' import type { PBNode } from '@ipld/dag-pb' const hamtShardedDirectoryContent: UnixfsV1Resolver = (cid, node, unixfs, path, resolve, depth, blockstore) => { - function yieldHamtDirectoryContent (options: ExporterOptions = {}): UnixfsV1DirectoryContent { + function yieldHamtDirectoryContent (options: ExporterOptions | BasicExporterOptions = {}): UnixfsV1DirectoryContent { options.onProgress?.(new CustomProgressEvent('unixfs:exporter:walk:hamt-sharded-directory', { cid })) @@ -20,7 +21,7 @@ const hamtShardedDirectoryContent: UnixfsV1Resolver = (cid, node, unixfs, path, return yieldHamtDirectoryContent } -async function * listDirectory (node: PBNode, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage, options: ExporterOptions): UnixfsV1DirectoryContent { +async function * listDirectory (node: PBNode, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage, options: ExporterOptions | BasicExporterOptions): any { const links = node.Links if (node.Data == null) { @@ -47,9 +48,28 @@ async function * listDirectory (node: PBNode, path: string, resolve: Resolve, de const name = link.Name != null ? link.Name.substring(padLength) : null if (name != null && name !== '') { - const result = await resolve(link.Hash, name, `${path}/${name}`, [], depth + 1, blockstore, options) + const linkPath = `${path}/${name}` + const load = async (options = {}): Promise => { + const result = await resolve(link.Hash, name, linkPath, [], depth + 1, blockstore, options) + return result.entry + } - return { entries: result.entry == null ? [] : [result.entry] } + if (isBasicExporterOptions(options)) { + return { + entries: [{ + cid: link.Hash, + name, + path: linkPath, + resolve: load + }] + } + } + + return { + entries: [ + await load() + ].filter(Boolean) + } } else { // descend into subshard const block = await blockstore.get(link.Hash, options) @@ -59,7 +79,9 @@ async function * listDirectory (node: PBNode, path: string, resolve: Resolve, de cid: link.Hash })) - return { entries: listDirectory(node, path, resolve, depth, blockstore, options) } + return { + entries: listDirectory(node, path, resolve, depth, blockstore, options) + } } } }), diff --git a/packages/ipfs-unixfs-exporter/src/utils/is-basic-exporter-options.ts b/packages/ipfs-unixfs-exporter/src/utils/is-basic-exporter-options.ts new file mode 100644 index 00000000..95190ea5 --- /dev/null +++ b/packages/ipfs-unixfs-exporter/src/utils/is-basic-exporter-options.ts @@ -0,0 +1,5 @@ +import type { BasicExporterOptions } from '../index.js' + +export function isBasicExporterOptions (obj?: any): obj is BasicExporterOptions { + return obj?.extended === false +} diff --git a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts index dd2354c5..fe16201e 100644 --- a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts +++ b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts @@ -363,4 +363,56 @@ describe('exporter sharded', function () { content: file?.node }]).to.deep.equal(files) }) + + it('exports basic sharded directory', async () => { + const files: Record = {} + + // needs to result in a block that is larger than SHARD_SPLIT_THRESHOLD bytes + for (let i = 0; i < 100; i++) { + files[`file-${Math.random()}.txt`] = { + content: uint8ArrayConcat(await all(randomBytes(100))) + } + } + + const imported = await all(importer(Object.keys(files).map(path => ({ + path, + content: asAsyncIterable(files[path].content) + })), block, { + wrapWithDirectory: true, + shardSplitThresholdBytes: SHARD_SPLIT_THRESHOLD, + rawLeaves: false + })) + + const dirCid = imported.pop()?.cid + + if (dirCid == null) { + throw new Error('No directory CID found') + } + + const exported = await exporter(dirCid, block) + const dirFiles = await all(exported.content()) + + // delete shard contents + for (const entry of dirFiles) { + await block.delete(entry.cid) + } + + // list the contents again, this time just the basic version + const basicDirFiles = await all(exported.content({ + extended: false + })) + expect(basicDirFiles.length).to.equal(dirFiles.length) + + for (let i = 0; i < basicDirFiles.length; i++) { + const dirFile = basicDirFiles[i] + + expect(dirFile).to.have.property('name') + expect(dirFile).to.have.property('path') + expect(dirFile).to.have.property('cid') + expect(dirFile).to.have.property('resolve') + + // should fail because we have deleted this block + await expect(dirFile.resolve()).to.eventually.be.rejected() + } + }) }) diff --git a/packages/ipfs-unixfs-exporter/test/exporter.spec.ts b/packages/ipfs-unixfs-exporter/test/exporter.spec.ts index 67326ec4..9ce3056e 100644 --- a/packages/ipfs-unixfs-exporter/test/exporter.spec.ts +++ b/packages/ipfs-unixfs-exporter/test/exporter.spec.ts @@ -1605,4 +1605,54 @@ describe('exporter', () => { expect(actualInvocations).to.deep.equal(expectedInvocations) }) + + it('exports basic directory', async () => { + const files: Record = {} + + for (let i = 0; i < 10; i++) { + files[`file-${Math.random()}.txt`] = { + content: uint8ArrayConcat(await all(randomBytes(100))) + } + } + + const imported = await all(importer(Object.keys(files).map(path => ({ + path, + content: asAsyncIterable(files[path].content) + })), block, { + wrapWithDirectory: true, + rawLeaves: false + })) + + const dirCid = imported.pop()?.cid + + if (dirCid == null) { + throw new Error('No directory CID found') + } + + const exported = await exporter(dirCid, block) + const dirFiles = await all(exported.content()) + + // delete shard contents + for (const entry of dirFiles) { + await block.delete(entry.cid) + } + + // list the contents again, this time just the basic version + const basicDirFiles = await all(exported.content({ + extended: false + })) + expect(basicDirFiles.length).to.equal(dirFiles.length) + + for (let i = 0; i < basicDirFiles.length; i++) { + const dirFile = basicDirFiles[i] + + expect(dirFile).to.have.property('name') + expect(dirFile).to.have.property('path') + expect(dirFile).to.have.property('cid') + expect(dirFile).to.have.property('resolve') + + // should fail because we have deleted this block + await expect(dirFile.resolve()).to.eventually.be.rejected() + } + }) })