Skip to content

feat: add 'extended' option to exporter #437

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 30, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions packages/ipfs-unixfs-exporter/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import type { PBNode } from '@ipld/dag-pb'
import type { Bucket } from 'hamt-sharding'
import type { Blockstore } from 'interface-blockstore'
import type { UnixFS } from 'ipfs-unixfs'
import type { AbortOptions } from 'it-pushable'
import type { ProgressOptions, ProgressEvent } from 'progress-events'

export * from './errors.js'
Expand Down Expand Up @@ -136,6 +137,21 @@ export interface ExporterOptions extends ProgressOptions<ExporterProgressEvents>
blockReadConcurrency?: number
}

export interface BasicExporterOptions extends ExporterOptions {
/**
* When directory contents are listed, by default the root node of each entry
* is fetched to decode the UnixFS metadata and know if the entry is a file or
* a directory. This can result in fetching extra data which may not be
* desirable, depending on your application.
*
* Pass false here to only return the CID and the name of the entry and not
* any extended metadata.
Comment on lines +147 to +148
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea i think this will solve ipfs/helia-verified-fetch#260

*
* @default true
*/
extended: false
}

export interface Exportable<T> {
/**
* A disambiguator to allow TypeScript to work out the type of the entry.
Expand Down Expand Up @@ -218,7 +234,7 @@ export interface Exportable<T> {
* // `entries` contains the first 5 files/directories in the directory
* ```
*/
content(options?: ExporterOptions): AsyncGenerator<T, void, unknown>
content(options?: ExporterOptions | BasicExporterOptions): AsyncGenerator<T, void, unknown>
}

/**
Expand Down Expand Up @@ -316,7 +332,39 @@ export interface Resolver { (cid: CID, name: string, path: string, toResolve: st
export type UnixfsV1FileContent = AsyncIterable<Uint8Array> | Iterable<Uint8Array>
export type UnixfsV1DirectoryContent = AsyncIterable<UnixFSEntry> | Iterable<UnixFSEntry>
export type UnixfsV1Content = UnixfsV1FileContent | UnixfsV1DirectoryContent
export interface UnixfsV1Resolver { (cid: CID, node: PBNode, unixfs: UnixFS, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage): (options: ExporterOptions) => UnixfsV1Content }

export interface UnixfsV1BasicContent {
/**
* The name of the entry
*/
name: string

/**
* The path of the entry within the DAG in which it was encountered
*/
path: string

/**
* The CID of the entry
*/
cid: CID

/**
* Resolve the root node of the entry to parse the UnixFS metadata contained
* there. The metadata will contain what kind of node it is (e.g. file,
* directory, etc), the file size, and more.
*/
resolve(options?: AbortOptions): Promise<UnixFSEntry>
}

export interface UnixFsV1ContentResolver {
(options: ExporterOptions): UnixfsV1Content
(options: BasicExporterOptions): UnixfsV1BasicContent
}

export interface UnixfsV1Resolver {
(cid: CID, node: PBNode, unixfs: UnixFS, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage): (options: ExporterOptions) => UnixfsV1Content
}

export interface ShardTraversalContext {
hamtDepth: number
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ import map from 'it-map'
import parallel from 'it-parallel'
import { pipe } from 'it-pipe'
import { CustomProgressEvent } from 'progress-events'
import type { ExporterOptions, ExportWalk, UnixfsV1DirectoryContent, UnixfsV1Resolver } from '../../../index.js'
import { isBasicExporterOptions } from '../../../utils/is-basic-exporter-options.ts'
import type { BasicExporterOptions, ExporterOptions, ExportWalk, UnixfsV1BasicContent, UnixfsV1Resolver } from '../../../index.js'

const directoryContent: UnixfsV1Resolver = (cid, node, unixfs, path, resolve, depth, blockstore) => {
async function * yieldDirectoryContent (options: ExporterOptions = {}): UnixfsV1DirectoryContent {
async function * yieldDirectoryContent (options: ExporterOptions | BasicExporterOptions = {}): any {
const offset = options.offset ?? 0
const length = options.length ?? node.Links.length
const links = node.Links.slice(offset, length)
Expand All @@ -21,6 +22,21 @@ const directoryContent: UnixfsV1Resolver = (cid, node, unixfs, path, resolve, de
return async () => {
const linkName = link.Name ?? ''
const linkPath = `${path}/${linkName}`

if (isBasicExporterOptions(options)) {
const basic: UnixfsV1BasicContent = {
cid: link.Hash,
name: linkName,
path: linkPath,
resolve: async (options = {}) => {
const result = await resolve(link.Hash, linkName, linkPath, [], depth + 1, blockstore, options)
return result.entry
}
}

return basic
}

const result = await resolve(link.Hash, linkName, linkPath, [], depth + 1, blockstore, options)
return result.entry
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ import parallel from 'it-parallel'
import { pipe } from 'it-pipe'
import { CustomProgressEvent } from 'progress-events'
import { NotUnixFSError } from '../../../errors.js'
import type { ExporterOptions, Resolve, UnixfsV1DirectoryContent, UnixfsV1Resolver, ReadableStorage, ExportWalk } from '../../../index.js'
import { isBasicExporterOptions } from '../../../utils/is-basic-exporter-options.ts'
import type { ExporterOptions, Resolve, UnixfsV1DirectoryContent, UnixfsV1Resolver, ReadableStorage, ExportWalk, BasicExporterOptions, UnixfsV1BasicContent } from '../../../index.js'
import type { PBNode } from '@ipld/dag-pb'

const hamtShardedDirectoryContent: UnixfsV1Resolver = (cid, node, unixfs, path, resolve, depth, blockstore) => {
function yieldHamtDirectoryContent (options: ExporterOptions = {}): UnixfsV1DirectoryContent {
function yieldHamtDirectoryContent (options: ExporterOptions | BasicExporterOptions = {}): UnixfsV1DirectoryContent {
options.onProgress?.(new CustomProgressEvent<ExportWalk>('unixfs:exporter:walk:hamt-sharded-directory', {
cid
}))
Expand All @@ -20,7 +21,7 @@ const hamtShardedDirectoryContent: UnixfsV1Resolver = (cid, node, unixfs, path,
return yieldHamtDirectoryContent
}

async function * listDirectory (node: PBNode, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage, options: ExporterOptions): UnixfsV1DirectoryContent {
async function * listDirectory (node: PBNode, path: string, resolve: Resolve, depth: number, blockstore: ReadableStorage, options: ExporterOptions | BasicExporterOptions): any {
const links = node.Links

if (node.Data == null) {
Expand All @@ -47,7 +48,23 @@ async function * listDirectory (node: PBNode, path: string, resolve: Resolve, de
const name = link.Name != null ? link.Name.substring(padLength) : null

if (name != null && name !== '') {
const result = await resolve(link.Hash, name, `${path}/${name}`, [], depth + 1, blockstore, options)
const linkPath = `${path}/${name}`

if (isBasicExporterOptions(options)) {
const basic: UnixfsV1BasicContent = {
cid: link.Hash,
name,
path: linkPath,
resolve: async (options = {}) => {
const result = await resolve(link.Hash, name, linkPath, [], depth + 1, blockstore, options)
return result.entry
}
}

return { entries: [basic] }
}

const result = await resolve(link.Hash, name, linkPath, [], depth + 1, blockstore, options)

return { entries: result.entry == null ? [] : [result.entry] }
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import type { BasicExporterOptions } from '../index.js'

export function isBasicExporterOptions (obj?: any): obj is BasicExporterOptions {
return obj?.extended === false
}
52 changes: 52 additions & 0 deletions packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -363,4 +363,56 @@ describe('exporter sharded', function () {
content: file?.node
}]).to.deep.equal(files)
})

it('exports basic sharded directory', async () => {
const files: Record<string, { content: Uint8Array, cid?: CID }> = {}

// needs to result in a block that is larger than SHARD_SPLIT_THRESHOLD bytes
for (let i = 0; i < 100; i++) {
files[`file-${Math.random()}.txt`] = {
content: uint8ArrayConcat(await all(randomBytes(100)))
}
}

const imported = await all(importer(Object.keys(files).map(path => ({
path,
content: asAsyncIterable(files[path].content)
})), block, {
wrapWithDirectory: true,
shardSplitThresholdBytes: SHARD_SPLIT_THRESHOLD,
rawLeaves: false
}))

const dirCid = imported.pop()?.cid

if (dirCid == null) {
throw new Error('No directory CID found')
}

const exported = await exporter(dirCid, block)
const dirFiles = await all(exported.content())

// delete shard contents
for (const entry of dirFiles) {
await block.delete(entry.cid)
}

// list the contents again, this time just the basic version
const basicDirFiles = await all(exported.content({
extended: false
}))
expect(basicDirFiles.length).to.equal(dirFiles.length)

for (let i = 0; i < basicDirFiles.length; i++) {
const dirFile = basicDirFiles[i]

expect(dirFile).to.have.property('name')
expect(dirFile).to.have.property('path')
expect(dirFile).to.have.property('cid')
expect(dirFile).to.have.property('resolve')

// should fail because we have deleted this block
await expect(dirFile.resolve()).to.eventually.be.rejected()
}
})
})
50 changes: 50 additions & 0 deletions packages/ipfs-unixfs-exporter/test/exporter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1605,4 +1605,54 @@ describe('exporter', () => {

expect(actualInvocations).to.deep.equal(expectedInvocations)
})

it('exports basic directory', async () => {
const files: Record<string, { content: Uint8Array, cid?: CID }> = {}

for (let i = 0; i < 10; i++) {
files[`file-${Math.random()}.txt`] = {
content: uint8ArrayConcat(await all(randomBytes(100)))
}
}

const imported = await all(importer(Object.keys(files).map(path => ({
path,
content: asAsyncIterable(files[path].content)
})), block, {
wrapWithDirectory: true,
rawLeaves: false
}))

const dirCid = imported.pop()?.cid

if (dirCid == null) {
throw new Error('No directory CID found')
}

const exported = await exporter(dirCid, block)
const dirFiles = await all(exported.content())

// delete shard contents
for (const entry of dirFiles) {
await block.delete(entry.cid)
}

// list the contents again, this time just the basic version
const basicDirFiles = await all(exported.content({
extended: false
}))
expect(basicDirFiles.length).to.equal(dirFiles.length)

for (let i = 0; i < basicDirFiles.length; i++) {
const dirFile = basicDirFiles[i]

expect(dirFile).to.have.property('name')
expect(dirFile).to.have.property('path')
expect(dirFile).to.have.property('cid')
expect(dirFile).to.have.property('resolve')

// should fail because we have deleted this block
await expect(dirFile.resolve()).to.eventually.be.rejected()
}
})
})
Loading