|
| 1 | +/** |
| 2 | + * Get Data Set Pieces |
| 3 | + * |
| 4 | + * Functions for retrieving pieces from a dataset with optional metadata enrichment. |
| 5 | + * |
| 6 | + * @module core/data-set/get-data-set-pieces |
| 7 | + */ |
| 8 | + |
| 9 | +import { METADATA_KEYS, type StorageContext, type Synapse, WarmStorageService } from '@filoz/synapse-sdk' |
| 10 | +import { isStorageContextWithDataSetId } from './type-guards.js' |
| 11 | +import type { |
| 12 | + DataSetPiecesResult, |
| 13 | + GetDataSetPiecesOptions, |
| 14 | + PieceInfo, |
| 15 | + StorageContextWithDataSetId, |
| 16 | + Warning, |
| 17 | +} from './types.js' |
| 18 | + |
| 19 | +/** |
| 20 | + * Get all pieces for a dataset from a StorageContext |
| 21 | + * |
| 22 | + * This function uses the StorageContext.getPieces() async generator to retrieve |
| 23 | + * all pieces in a dataset. Optionally fetches metadata for each piece from WarmStorage. |
| 24 | + * |
| 25 | + * Example usage: |
| 26 | + * ```typescript |
| 27 | + * const result = await getDataSetPieces(storageContext, { |
| 28 | + * includeMetadata: true, |
| 29 | + * batchSize: 100 |
| 30 | + * }) |
| 31 | + * |
| 32 | + * console.log(`Found ${result.pieces.length} pieces`) |
| 33 | + * for (const piece of result.pieces) { |
| 34 | + * console.log(` ${piece.pieceCid}`) |
| 35 | + * if (piece.rootIpfsCid) { |
| 36 | + * console.log(` IPFS: ${piece.rootIpfsCid}`) |
| 37 | + * } |
| 38 | + * } |
| 39 | + * ``` |
| 40 | + * |
| 41 | + * @param storageContext - Storage context from upload or dataset resolution |
| 42 | + * @param options - Optional configuration |
| 43 | + * @returns Pieces and warnings |
| 44 | + */ |
| 45 | +export async function getDataSetPieces( |
| 46 | + synapse: Synapse, |
| 47 | + storageContext: StorageContext, |
| 48 | + options?: GetDataSetPiecesOptions |
| 49 | +): Promise<DataSetPiecesResult> { |
| 50 | + const logger = options?.logger |
| 51 | + const includeMetadata = options?.includeMetadata ?? false |
| 52 | + const signal = options?.signal |
| 53 | + |
| 54 | + if (!isStorageContextWithDataSetId(storageContext)) { |
| 55 | + throw new Error('Storage context does not have a dataset ID') |
| 56 | + } |
| 57 | + |
| 58 | + const pieces: PieceInfo[] = [] |
| 59 | + const warnings: Warning[] = [] |
| 60 | + |
| 61 | + // Use the async generator to fetch all pieces |
| 62 | + try { |
| 63 | + const getPiecesOptions = { ...(signal && { signal }) } |
| 64 | + for await (const piece of storageContext.getPieces(getPiecesOptions)) { |
| 65 | + const pieceInfo: PieceInfo = { |
| 66 | + pieceId: piece.pieceId, |
| 67 | + pieceCid: piece.pieceCid.toString(), |
| 68 | + } |
| 69 | + |
| 70 | + pieces.push(pieceInfo) |
| 71 | + } |
| 72 | + } catch (error) { |
| 73 | + // If getPieces fails completely, throw - this is a critical error |
| 74 | + logger?.error({ dataSetId: storageContext.dataSetId, error }, 'Failed to retrieve pieces from dataset') |
| 75 | + throw new Error(`Failed to retrieve pieces for dataset ${storageContext.dataSetId}: ${String(error)}`) |
| 76 | + } |
| 77 | + |
| 78 | + // Optionally enrich with metadata |
| 79 | + if (includeMetadata && pieces.length > 0) { |
| 80 | + await enrichPiecesWithMetadata(synapse, storageContext, pieces, warnings, logger) |
| 81 | + } |
| 82 | + |
| 83 | + return { |
| 84 | + pieces, |
| 85 | + dataSetId: storageContext.dataSetId, |
| 86 | + warnings, |
| 87 | + } |
| 88 | +} |
| 89 | + |
| 90 | +/** |
| 91 | + * Internal helper: Enrich pieces with metadata from WarmStorage |
| 92 | + * |
| 93 | + * This function fetches metadata for each piece and extracts: |
| 94 | + * - rootIpfsCid (from METADATA_KEYS.IPFS_ROOT_CID) |
| 95 | + * - Full metadata object |
| 96 | + * |
| 97 | + * Non-fatal errors are added to the warnings array. |
| 98 | + */ |
| 99 | +async function enrichPiecesWithMetadata( |
| 100 | + synapse: Synapse, |
| 101 | + storageContext: StorageContextWithDataSetId, |
| 102 | + pieces: PieceInfo[], |
| 103 | + warnings: Warning[], |
| 104 | + logger?: GetDataSetPiecesOptions['logger'] |
| 105 | +): Promise<void> { |
| 106 | + const dataSetId = storageContext.dataSetId |
| 107 | + |
| 108 | + // Create WarmStorage service instance |
| 109 | + let warmStorage: WarmStorageService |
| 110 | + try { |
| 111 | + warmStorage = await WarmStorageService.create(synapse.getProvider(), synapse.getWarmStorageAddress()) |
| 112 | + } catch (error) { |
| 113 | + // If we can't create the service, warn and return |
| 114 | + logger?.warn({ error }, 'Failed to create WarmStorageService for metadata enrichment') |
| 115 | + warnings.push({ |
| 116 | + code: 'WARM_STORAGE_INIT_FAILED', |
| 117 | + message: 'Failed to initialize WarmStorageService for metadata enrichment', |
| 118 | + context: { error: String(error) }, |
| 119 | + }) |
| 120 | + return |
| 121 | + } |
| 122 | + |
| 123 | + // Fetch metadata for each piece |
| 124 | + for (const piece of pieces) { |
| 125 | + try { |
| 126 | + const metadata = await warmStorage.getPieceMetadata(dataSetId, piece.pieceId) |
| 127 | + |
| 128 | + // Extract root IPFS CID if available |
| 129 | + const rootIpfsCid = metadata[METADATA_KEYS.IPFS_ROOT_CID] |
| 130 | + if (rootIpfsCid) { |
| 131 | + piece.rootIpfsCid = rootIpfsCid |
| 132 | + } |
| 133 | + |
| 134 | + // Store full metadata |
| 135 | + piece.metadata = metadata |
| 136 | + } catch (error) { |
| 137 | + // Non-fatal: piece exists but metadata fetch failed |
| 138 | + logger?.warn( |
| 139 | + { |
| 140 | + dataSetId, |
| 141 | + pieceId: piece.pieceId, |
| 142 | + error, |
| 143 | + }, |
| 144 | + 'Failed to fetch metadata for piece' |
| 145 | + ) |
| 146 | + |
| 147 | + warnings.push({ |
| 148 | + code: 'METADATA_FETCH_FAILED', |
| 149 | + message: `Failed to fetch metadata for piece ${piece.pieceId}`, |
| 150 | + context: { |
| 151 | + pieceId: piece.pieceId, |
| 152 | + dataSetId, |
| 153 | + error: String(error), |
| 154 | + }, |
| 155 | + }) |
| 156 | + } |
| 157 | + } |
| 158 | +} |
0 commit comments