-
Notifications
You must be signed in to change notification settings - Fork 15
fix: get pieces from contract instead of pdpServer #249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
cf89ea3
96d6a5a
4c5e0a6
2018a6e
aaccbe1
5972649
caaf368
80bd959
b8a4028
d7ab36e
732a91f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |
|
|
||
| import type { LegacyPieceLink as LegacyPieceCIDType, PieceLink as PieceCIDType } from '@web3-storage/data-segment' | ||
| import * as Hasher from '@web3-storage/data-segment/multihash' | ||
| import { fromLink } from '@web3-storage/data-segment/piece' | ||
| import { CID } from 'multiformats/cid' | ||
| import * as Raw from 'multiformats/codecs/raw' | ||
| import * as Digest from 'multiformats/hashes/digest' | ||
|
|
@@ -164,6 +165,47 @@ export function calculate(data: Uint8Array): PieceCID { | |
| return Link.create(Raw.code, digest) | ||
| } | ||
|
|
||
| /** | ||
| * Extract leaf count from a PieceCID v2 | ||
| * @param pieceCid - The PieceCID to extract leaf count from | ||
| * @returns The leaf count (number of leaves in the merkle tree) or null if invalid | ||
| */ | ||
| export function getLeafCount(pieceCid: PieceCID | CID | string): number | null { | ||
| const validPieceCid = asPieceCID(pieceCid) | ||
| if (!validPieceCid) { | ||
| return null | ||
| } | ||
|
|
||
| try { | ||
| const piece = fromLink(validPieceCid) | ||
| // The leaf count is 2^height | ||
| return 2 ** piece.height | ||
| } catch { | ||
| return null | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Extract raw size from a PieceCID v2 | ||
| * @param pieceCid - The PieceCID to extract raw size from | ||
| * @returns The raw size in bytes or null if invalid | ||
| */ | ||
| export function getRawSize(pieceCid: PieceCID | CID | string): number | null { | ||
|
||
| const validPieceCid = asPieceCID(pieceCid) | ||
| if (!validPieceCid) { | ||
| return null | ||
| } | ||
|
|
||
| try { | ||
| const piece = fromLink(validPieceCid) | ||
| // Raw size is leaf count * 32 bytes | ||
| const leafCount = 2 ** piece.height | ||
| return leafCount * 32 | ||
| } catch { | ||
| return null | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Create a TransformStream that calculates PieceCID while streaming data through it | ||
| * This allows calculating PieceCID without buffering the entire data in memory | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,14 +22,18 @@ | |
| * ``` | ||
| */ | ||
|
|
||
| import type { ethers } from 'ethers' | ||
| import { ethers } from 'ethers' | ||
| import { CID } from 'multiformats/cid' | ||
| import type { PaymentsService } from '../payments/index.ts' | ||
| import { PDPAuthHelper, PDPServer } from '../pdp/index.ts' | ||
| import { asPieceCID } from '../piece/index.ts' | ||
| import { PDPVerifier } from '../pdp/verifier.ts' | ||
| import { asPieceCID, getLeafCount, getRawSize } from '../piece/index.ts' | ||
| import { SPRegistryService } from '../sp-registry/index.ts' | ||
| import type { ProviderInfo } from '../sp-registry/types.ts' | ||
| import type { Synapse } from '../synapse.ts' | ||
| import type { | ||
| DataSetPieceData, | ||
| DataSetPieceDataWithLeafCount, | ||
| DownloadOptions, | ||
| EnhancedDataSetInfo, | ||
| MetadataEntry, | ||
|
|
@@ -1260,6 +1264,116 @@ export class StorageContext { | |
| return dataSetData.pieces.map((piece) => piece.pieceCid) | ||
| } | ||
|
|
||
| async getPiecesWithDetails(options?: { | ||
| batchSize?: number | ||
| signal?: AbortSignal | ||
| }): Promise<DataSetPieceDataWithLeafCount[]> { | ||
| const pieces: DataSetPieceDataWithLeafCount[] = [] | ||
|
|
||
| for await (const piece of this.getAllActivePiecesGenerator(options)) { | ||
| // TODO: should we call the contract for leaf count? i.e. pdpVerifier.getPieceLeafCount(this._dataSetId, piece.pieceId) | ||
| const leafCount = getLeafCount(piece.pieceCid) ?? 0 | ||
|
||
| // TODO: is there a better way to get the raw size? | ||
| const rawSize = getRawSize(piece.pieceCid) ?? 0 | ||
SgtPooki marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| pieces.push({ | ||
| pieceId: piece.pieceId, | ||
| pieceCid: piece.pieceCid, | ||
| rawSize, | ||
| leafCount, | ||
| subPieceCid: piece.pieceCid, | ||
| subPieceOffset: 0, // TODO: figure out how to get the sub piece offset | ||
| } satisfies DataSetPieceDataWithLeafCount) | ||
| } | ||
|
|
||
| return pieces | ||
| } | ||
|
|
||
| /** | ||
| * Get all active pieces for this data set directly from the PDPVerifier contract. | ||
| * This bypasses Curio and gets the authoritative piece list from the blockchain. | ||
| * @param options - Optional configuration object | ||
| * @param options.batchSize - The batch size for each pagination call (default: 100) | ||
| * @param options.signal - Optional AbortSignal to cancel the operation | ||
| * @returns Array of all active pieces with their details including PieceCID | ||
| */ | ||
| async getAllActivePieces(options?: { batchSize?: number; signal?: AbortSignal }): Promise<Array<DataSetPieceData>> { | ||
| const allPieces: Array<DataSetPieceData> = [] | ||
|
|
||
| for await (const piece of this.getAllActivePiecesGenerator(options)) { | ||
| allPieces.push({ | ||
| pieceId: piece.pieceId, | ||
| pieceCid: piece.pieceCid, | ||
| subPieceCid: piece.pieceCid, | ||
| subPieceOffset: 0, // TODO: figure out how to get the sub piece offset | ||
| } satisfies DataSetPieceData) | ||
| } | ||
|
|
||
| return allPieces | ||
| } | ||
|
|
||
| /** | ||
| * Get all active pieces for this data set as an async generator. | ||
| * This provides lazy evaluation and better memory efficiency for large data sets. | ||
| * Gets data directly from PDPVerifier contract (source of truth) rather than Curio. | ||
| * @param options - Optional configuration object | ||
| * @param options.batchSize - The batch size for each pagination call (default: 100) | ||
| * @param options.signal - Optional AbortSignal to cancel the operation | ||
| * @yields Individual pieces with their details including PieceCID | ||
| */ | ||
| async *getAllActivePiecesGenerator(options?: { | ||
| batchSize?: number | ||
| signal?: AbortSignal | ||
| }): AsyncGenerator<DataSetPieceData> { | ||
| const pdpVerifierAddress = this._warmStorageService.getPDPVerifierAddress() | ||
| const pdpVerifier = new PDPVerifier(this._synapse.getProvider(), pdpVerifierAddress) | ||
|
|
||
| const batchSize = options?.batchSize ?? 100 | ||
| const signal = options?.signal | ||
| let offset = 0 | ||
| let hasMore = true | ||
|
|
||
| while (hasMore) { | ||
| if (signal?.aborted) { | ||
| throw createError('StorageContext', 'getAllActivePiecesGenerator', 'Operation aborted') | ||
| } | ||
|
|
||
| const result = await pdpVerifier.getActivePieces(this._dataSetId, { offset, limit: batchSize, signal }) | ||
|
|
||
| // Yield pieces one by one for lazy evaluation | ||
| for (let i = 0; i < result.pieces.length; i++) { | ||
| if (signal?.aborted) { | ||
| throw createError('StorageContext', 'getAllActivePiecesGenerator', 'Operation aborted') | ||
| } | ||
|
|
||
| // Parse the piece data as a PieceCID | ||
| // The contract stores the full PieceCID multihash digest (including height and padding) | ||
| // The data comes as a hex string from ethers, we need to decode it as bytes then as a CID | ||
| const pieceDataHex = result.pieces[i].data | ||
| const pieceDataBytes = ethers.getBytes(pieceDataHex) | ||
|
|
||
| const cid = CID.decode(pieceDataBytes) | ||
| const pieceCid = asPieceCID(cid) | ||
| if (!pieceCid) { | ||
| throw createError( | ||
| 'StorageContext', | ||
| 'getAllActivePiecesGenerator', | ||
| `Invalid PieceCID returned from contract for piece ${result.pieceIds[i]}` | ||
| ) | ||
| } | ||
|
|
||
| yield { | ||
| pieceId: result.pieceIds[i], | ||
| pieceCid, | ||
| subPieceCid: pieceCid, | ||
| subPieceOffset: 0, // TODO: figure out how to get the sub piece offset | ||
| } satisfies DataSetPieceData | ||
| } | ||
|
|
||
| hasMore = result.hasMore | ||
| offset += batchSize | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Check if a piece exists on this service provider. | ||
| * @param pieceCid - The PieceCID (piece CID) to check | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,15 @@ import type { API } from '@web3-storage/data-segment' | |
| import { Size, toLink } from '@web3-storage/data-segment/piece' | ||
| import { assert } from 'chai' | ||
| import { CID } from 'multiformats/cid' | ||
| import { asLegacyPieceCID, asPieceCID, calculate, createPieceCIDStream, type PieceCID } from '../piece/index.ts' | ||
| import { | ||
| asLegacyPieceCID, | ||
| asPieceCID, | ||
| calculate, | ||
| createPieceCIDStream, | ||
| getLeafCount, | ||
| getRawSize, | ||
| type PieceCID, | ||
| } from '../piece/index.ts' | ||
|
|
||
| // https://github.com/filecoin-project/go-fil-commp-hashhash/blob/master/testdata/zero.txt | ||
| const zeroPieceCidFixture = ` | ||
|
|
@@ -246,4 +254,57 @@ describe('PieceCID utilities', () => { | |
| // more complex async coordination, so we keep this test simple | ||
| }) | ||
| }) | ||
|
|
||
| describe('getLeafCount', () => { | ||
| zeroPieceCidFixture.forEach(([size, , v1]) => { | ||
| it(`should extract correct leaf count for size ${size}`, () => { | ||
| const v2 = toPieceCID(BigInt(size), v1) | ||
| const leafCount = getLeafCount(v2) | ||
|
|
||
| // Expected leaf count is 2^height where height is calculated from size | ||
| const expectedHeight = Size.Unpadded.toHeight(BigInt(size)) | ||
| const expectedLeafCount = 2 ** expectedHeight | ||
|
|
||
| assert.isNotNull(leafCount) | ||
| assert.strictEqual(leafCount, expectedLeafCount) | ||
|
||
| }) | ||
| }) | ||
|
|
||
| it('should return null for invalid PieceCID', () => { | ||
| const result = getLeafCount(invalidCidString) | ||
| assert.isNull(result) | ||
| }) | ||
|
|
||
| it('should return null for null input', () => { | ||
| const result = getLeafCount(null as any) | ||
| assert.isNull(result) | ||
| }) | ||
| }) | ||
|
|
||
| describe('getRawSize', () => { | ||
| zeroPieceCidFixture.forEach(([size, , v1]) => { | ||
| it(`should extract correct raw size for size ${size}`, () => { | ||
| const v2 = toPieceCID(BigInt(size), v1) | ||
| const rawSize = getRawSize(v2) | ||
|
|
||
| // Expected raw size is leaf count * 32 | ||
| const expectedHeight = Size.Unpadded.toHeight(BigInt(size)) | ||
| const expectedLeafCount = 2 ** expectedHeight | ||
| const expectedRawSize = expectedLeafCount * 32 | ||
|
||
|
|
||
| assert.isNotNull(rawSize) | ||
| assert.strictEqual(rawSize, expectedRawSize) | ||
| }) | ||
| }) | ||
|
|
||
| it('should return null for invalid PieceCID', () => { | ||
| const result = getRawSize(invalidCidString) | ||
| assert.isNull(result) | ||
| }) | ||
|
|
||
| it('should return null for null input', () => { | ||
| const result = getRawSize(null as any) | ||
| assert.isNull(result) | ||
| }) | ||
| }) | ||
| }) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
callout here.. not sure if piece.ts leafCount calculation is enough?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is ok, but I don't think it's all that useful here; let's remove it for now and ignore leaf counts -- they mostly shouldn't be a concern to the user other than their fairly close relationship to size