Skip to content

Commit 6d81508

Browse files
SgtPookiCopilot
andauthored
feat: add core/data-set methods (#210)
* feat: add core/data-set methods * Update src/core/data-set/types.ts Co-authored-by: Copilot <[email protected]> * Update src/core/data-set/get-data-set-pieces.ts Co-authored-by: Copilot <[email protected]> * Update src/core/data-set/get-data-set-pieces.ts Co-authored-by: Copilot <[email protected]> * fix: remove batchSize from core/data-set * fix: add createdWithFilecoinPin to DataSetSummary also adds some jsdoc comments explaining that metadata does exist * fix: call sp-registry for specific data-set providers * test: cleanup core/data-set unit tests --------- Co-authored-by: Copilot <[email protected]>
1 parent f46eab4 commit 6d81508

File tree

10 files changed

+829
-17
lines changed

10 files changed

+829
-17
lines changed

package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
"types": "./dist/core/car/index.d.ts",
2727
"default": "./dist/core/car/index.js"
2828
},
29+
"./core/data-set": {
30+
"types": "./dist/core/data-set/index.d.ts",
31+
"default": "./dist/core/data-set/index.js"
32+
},
2933
"./core/payments": {
3034
"types": "./dist/core/payments/index.d.ts",
3135
"default": "./dist/core/payments/index.js"
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
/**
2+
* Get Data Set Pieces
3+
*
4+
* Functions for retrieving pieces from a dataset with optional metadata enrichment.
5+
*
6+
* @module core/data-set/get-data-set-pieces
7+
*/
8+
9+
import { METADATA_KEYS, type StorageContext, type Synapse, WarmStorageService } from '@filoz/synapse-sdk'
10+
import { isStorageContextWithDataSetId } from './type-guards.js'
11+
import type {
12+
DataSetPiecesResult,
13+
GetDataSetPiecesOptions,
14+
PieceInfo,
15+
StorageContextWithDataSetId,
16+
Warning,
17+
} from './types.js'
18+
19+
/**
20+
* Get all pieces for a dataset from a StorageContext
21+
*
22+
* This function uses the StorageContext.getPieces() async generator to retrieve
23+
* all pieces in a dataset. Optionally fetches metadata for each piece from WarmStorage.
24+
*
25+
* Example usage:
26+
* ```typescript
27+
* const result = await getDataSetPieces(storageContext, {
28+
* includeMetadata: true,
29+
* batchSize: 100
30+
* })
31+
*
32+
* console.log(`Found ${result.pieces.length} pieces`)
33+
* for (const piece of result.pieces) {
34+
* console.log(` ${piece.pieceCid}`)
35+
* if (piece.rootIpfsCid) {
36+
* console.log(` IPFS: ${piece.rootIpfsCid}`)
37+
* }
38+
* }
39+
* ```
40+
*
41+
* @param storageContext - Storage context from upload or dataset resolution
42+
* @param options - Optional configuration
43+
* @returns Pieces and warnings
44+
*/
45+
export async function getDataSetPieces(
46+
synapse: Synapse,
47+
storageContext: StorageContext,
48+
options?: GetDataSetPiecesOptions
49+
): Promise<DataSetPiecesResult> {
50+
const logger = options?.logger
51+
const includeMetadata = options?.includeMetadata ?? false
52+
const signal = options?.signal
53+
54+
if (!isStorageContextWithDataSetId(storageContext)) {
55+
throw new Error('Storage context does not have a dataset ID')
56+
}
57+
58+
const pieces: PieceInfo[] = []
59+
const warnings: Warning[] = []
60+
61+
// Use the async generator to fetch all pieces
62+
try {
63+
const getPiecesOptions = { ...(signal && { signal }) }
64+
for await (const piece of storageContext.getPieces(getPiecesOptions)) {
65+
const pieceInfo: PieceInfo = {
66+
pieceId: piece.pieceId,
67+
pieceCid: piece.pieceCid.toString(),
68+
}
69+
70+
pieces.push(pieceInfo)
71+
}
72+
} catch (error) {
73+
// If getPieces fails completely, throw - this is a critical error
74+
logger?.error({ dataSetId: storageContext.dataSetId, error }, 'Failed to retrieve pieces from dataset')
75+
throw new Error(`Failed to retrieve pieces for dataset ${storageContext.dataSetId}: ${String(error)}`)
76+
}
77+
78+
// Optionally enrich with metadata
79+
if (includeMetadata && pieces.length > 0) {
80+
await enrichPiecesWithMetadata(synapse, storageContext, pieces, warnings, logger)
81+
}
82+
83+
return {
84+
pieces,
85+
dataSetId: storageContext.dataSetId,
86+
warnings,
87+
}
88+
}
89+
90+
/**
91+
* Internal helper: Enrich pieces with metadata from WarmStorage
92+
*
93+
* This function fetches metadata for each piece and extracts:
94+
* - rootIpfsCid (from METADATA_KEYS.IPFS_ROOT_CID)
95+
* - Full metadata object
96+
*
97+
* Non-fatal errors are added to the warnings array.
98+
*/
99+
async function enrichPiecesWithMetadata(
100+
synapse: Synapse,
101+
storageContext: StorageContextWithDataSetId,
102+
pieces: PieceInfo[],
103+
warnings: Warning[],
104+
logger?: GetDataSetPiecesOptions['logger']
105+
): Promise<void> {
106+
const dataSetId = storageContext.dataSetId
107+
108+
// Create WarmStorage service instance
109+
let warmStorage: WarmStorageService
110+
try {
111+
warmStorage = await WarmStorageService.create(synapse.getProvider(), synapse.getWarmStorageAddress())
112+
} catch (error) {
113+
// If we can't create the service, warn and return
114+
logger?.warn({ error }, 'Failed to create WarmStorageService for metadata enrichment')
115+
warnings.push({
116+
code: 'WARM_STORAGE_INIT_FAILED',
117+
message: 'Failed to initialize WarmStorageService for metadata enrichment',
118+
context: { error: String(error) },
119+
})
120+
return
121+
}
122+
123+
// Fetch metadata for each piece
124+
for (const piece of pieces) {
125+
try {
126+
const metadata = await warmStorage.getPieceMetadata(dataSetId, piece.pieceId)
127+
128+
// Extract root IPFS CID if available
129+
const rootIpfsCid = metadata[METADATA_KEYS.IPFS_ROOT_CID]
130+
if (rootIpfsCid) {
131+
piece.rootIpfsCid = rootIpfsCid
132+
}
133+
134+
// Store full metadata
135+
piece.metadata = metadata
136+
} catch (error) {
137+
// Non-fatal: piece exists but metadata fetch failed
138+
logger?.warn(
139+
{
140+
dataSetId,
141+
pieceId: piece.pieceId,
142+
error,
143+
},
144+
'Failed to fetch metadata for piece'
145+
)
146+
147+
warnings.push({
148+
code: 'METADATA_FETCH_FAILED',
149+
message: `Failed to fetch metadata for piece ${piece.pieceId}`,
150+
context: {
151+
pieceId: piece.pieceId,
152+
dataSetId,
153+
error: String(error),
154+
},
155+
})
156+
}
157+
}
158+
}

src/core/data-set/index.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Data Set Core Module
3+
*
4+
* This module provides reusable functions for working with Filecoin data-sets
5+
* and pieces. It wraps synapse-sdk methods to provide a clean API that abstracts
6+
* away WarmStorageService and PDPServer implementation details.
7+
*
8+
* Key features:
9+
* - List datasets with optional provider enrichment
10+
* - Get pieces from a StorageContext with optional metadata
11+
* - Graceful error handling with structured warnings
12+
* - Clean separation of concerns (follows SOLID principles)
13+
*
14+
* @module core/data-set
15+
*/
16+
17+
export * from './get-data-set-pieces.js'
18+
export * from './list-data-sets.js'
19+
export * from './types.js'
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* List Data Sets
3+
*
4+
* Functions for listing and summarizing datasets with optional provider enrichment.
5+
*
6+
* @module core/data-set/list-data-sets
7+
*/
8+
9+
import type { ProviderInfo, Synapse } from '@filoz/synapse-sdk'
10+
import { SPRegistryService } from '@filoz/synapse-sdk/sp-registry'
11+
import { DEFAULT_DATA_SET_METADATA } from '../synapse/constants.js'
12+
import type { DataSetSummary, ListDataSetsOptions } from './types.js'
13+
14+
/**
15+
* List all datasets for an address with optional provider enrichment
16+
*
17+
* Example usage:
18+
* ```typescript
19+
* const synapse = await Synapse.create({ privateKey, rpcURL })
20+
* const datasets = await listDataSets(synapse)
21+
*
22+
* for (const ds of datasets) {
23+
* console.log(`Dataset ${ds.dataSetId}: ${ds.currentPieceCount} pieces`)
24+
* if (ds.provider) {
25+
* console.log(` Provider: ${ds.provider.name}`)
26+
* }
27+
* }
28+
* ```
29+
*
30+
* @param synapse - Initialized Synapse instance
31+
* @param options - Optional configuration
32+
* @returns Array of dataset summaries
33+
*/
34+
export async function listDataSets(synapse: Synapse, options?: ListDataSetsOptions): Promise<DataSetSummary[]> {
35+
const logger = options?.logger
36+
const address = options?.address ?? (await synapse.getClient().getAddress())
37+
38+
// Step 1: Find data sets
39+
const dataSets = await synapse.storage.findDataSets(address)
40+
41+
// Step 2: Collect unique provider IDs from data sets
42+
const uniqueProviderIds = Array.from(new Set(dataSets.map((ds) => ds.providerId)))
43+
44+
// Step 3: Fetch provider info for the specific provider IDs using sp-registry
45+
let providerMap: Map<number, ProviderInfo> = new Map()
46+
if (uniqueProviderIds.length > 0) {
47+
try {
48+
const spRegistry = new SPRegistryService(synapse.getProvider(), synapse.getNetwork())
49+
const providers = await spRegistry.getProviders(uniqueProviderIds)
50+
providerMap = new Map(providers.map((provider) => [provider.id, provider] as const))
51+
} catch (error) {
52+
logger?.warn({ error }, 'Failed to fetch provider info from sp-registry for provider enrichment')
53+
}
54+
}
55+
56+
// Map SDK datasets to our summary format (spread all fields, add dataSetId alias, provider, and filecoin-pin creation flag)
57+
return dataSets.map((ds) => {
58+
// Check if this dataset was created by filecoin-pin by looking for our DEFAULT_DATA_SET_METADATA fields
59+
const createdWithFilecoinPin = Object.entries(DEFAULT_DATA_SET_METADATA).every(
60+
([key, value]) => ds.metadata[key] === value
61+
)
62+
63+
const summary: DataSetSummary = {
64+
...ds,
65+
dataSetId: ds.pdpVerifierDataSetId,
66+
provider: providerMap.get(ds.providerId),
67+
createdWithFilecoinPin,
68+
}
69+
return summary
70+
})
71+
}

src/core/data-set/type-guards.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import type { StorageContext } from '@filoz/synapse-sdk'
2+
import type { StorageContextWithDataSetId } from './types.js'
3+
4+
export function isStorageContextWithDataSetId(value: StorageContext): value is StorageContextWithDataSetId {
5+
return typeof value === 'object' && value !== null && 'dataSetId' in value && typeof value.dataSetId === 'number'
6+
}

src/core/data-set/types.ts

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/**
2+
* Data Set Types
3+
*
4+
* Type definitions for working with Filecoin data-sets and pieces.
5+
* These types wrap synapse-sdk primitives to provide a consistent
6+
* interface for querying and enriching dataset information.
7+
*
8+
* @module core/data-set/types
9+
*/
10+
11+
import type { EnhancedDataSetInfo, ProviderInfo, StorageContext } from '@filoz/synapse-sdk'
12+
import type { Logger } from 'pino'
13+
14+
/**
15+
* Information about a single piece in a dataset
16+
*/
17+
export interface PieceInfo {
18+
/** Unique piece identifier (within dataset) */
19+
pieceId: number
20+
/** Piece Commitment (CommP) as string */
21+
pieceCid: string
22+
/** Root IPFS CID (from metadata, if available) */
23+
rootIpfsCid?: string
24+
/** Piece size in bytes (if available) */
25+
size?: number
26+
/** Additional piece metadata (key-value pairs) */
27+
metadata?: Record<string, string>
28+
}
29+
30+
/**
31+
* Result from getting pieces for a dataset
32+
*/
33+
export interface DataSetPiecesResult {
34+
/** List of pieces in the dataset */
35+
pieces: PieceInfo[]
36+
/** Dataset ID these pieces belong to */
37+
dataSetId: number
38+
/** Non-fatal warnings encountered during retrieval */
39+
warnings?: Warning[]
40+
}
41+
42+
/**
43+
* Structured warning for non-fatal issues
44+
*/
45+
export interface Warning {
46+
/** Machine-readable warning code (e.g., 'METADATA_FETCH_FAILED') */
47+
code: string
48+
/** Human-readable warning message */
49+
message: string
50+
/** Additional context data (e.g., { pieceId: 123, dataSetId: 456 }) */
51+
context?: Record<string, unknown>
52+
}
53+
54+
/**
55+
* Summary information for a dataset
56+
*
57+
* Extends EnhancedDataSetInfo from synapse-sdk with optional provider enrichment.
58+
* This includes all fields needed by both the CLI and website:
59+
* - Rail IDs (pdpRailId, cdnRailId, cacheMissRailId)
60+
* - Contract details (commissionBps, pdpEndEpoch, cdnEndEpoch)
61+
* - Piece tracking (nextPieceId, currentPieceCount)
62+
* - Provider enrichment (optional provider field)
63+
* - Dataset metadata (inherited from EnhancedDataSetInfo.metadata - key-value pairs from WarmStorage)
64+
* - Filecoin-pin creation flag (indicates if created by filecoin-pin)
65+
*
66+
* The dataSetId alias makes pdpVerifierDataSetId more discoverable.
67+
*/
68+
export interface DataSetSummary extends EnhancedDataSetInfo {
69+
/** PDP Verifier dataset ID (alias for pdpVerifierDataSetId) */
70+
dataSetId: number
71+
/** Provider information (enriched from getStorageInfo if available) */
72+
provider: ProviderInfo | undefined
73+
/** Indicates if this dataset was created by filecoin-pin (has WITH_IPFS_INDEXING and source='filecoin-pin' metadata) */
74+
createdWithFilecoinPin: boolean
75+
}
76+
77+
/**
78+
* Options for listing datasets
79+
*/
80+
export interface ListDataSetsOptions {
81+
/** Address to list datasets for (defaults to synapse client address) */
82+
address?: string
83+
/** Logger instance for debugging (optional) */
84+
logger?: Logger
85+
}
86+
87+
/**
88+
* Options for getting pieces from a dataset
89+
*/
90+
export interface GetDataSetPiecesOptions {
91+
/** Whether to fetch and include piece metadata from WarmStorage */
92+
includeMetadata?: boolean
93+
/** Abort signal for cancellation */
94+
signal?: AbortSignal
95+
/** Logger instance for debugging (optional) */
96+
logger?: Logger
97+
}
98+
99+
export type StorageContextWithDataSetId = StorageContext & { dataSetId: number }

src/core/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export * from './car/index.js'
2+
export * from './data-set/index.js'
23
export * from './payments/index.js'
34
export * from './synapse/index.js'
45
export * from './unixfs/index.js'

src/core/synapse/constants.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { METADATA_KEYS } from '@filoz/synapse-sdk'
2+
3+
/**
4+
* Default metadata for Synapse data sets created by filecoin-pin
5+
*/
6+
export const DEFAULT_DATA_SET_METADATA = {
7+
[METADATA_KEYS.WITH_IPFS_INDEXING]: '', // Enable IPFS indexing for all data sets
8+
source: 'filecoin-pin', // Identify the source application
9+
} as const
10+
11+
/**
12+
* Default configuration for creating storage contexts
13+
*/
14+
export const DEFAULT_STORAGE_CONTEXT_CONFIG = {
15+
withIpni: true, // Always filter for IPNI-enabled providers for IPFS indexing
16+
metadata: DEFAULT_DATA_SET_METADATA,
17+
} as const

0 commit comments

Comments
 (0)