Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/pdp/verifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@ export class PDPVerifier {
return Number(leafCount)
}

/**
* Get the leaf count for a specific piece
* @param dataSetId - The PDPVerifier data set ID
* @param pieceId - The piece ID within the data set
* @returns The number of leaves for this piece
*/
async getPieceLeafCount(dataSetId: number, pieceId: number): Promise<number> {
// TODO: DO we need to call the contract for leaf count?
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

callout here.. not sure if piece.ts leafCount calculation is enough?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is ok, but I don't think it's all that useful here; let's remove it for now and ignore leaf counts -- they mostly shouldn't be a concern to the user other than their fairly close relationship to size

const leafCount = await this._contract.getPieceLeafCount(dataSetId, pieceId)
return Number(leafCount)
}

/**
* Extract data set ID from a transaction receipt by looking for DataSetCreated events
* @param receipt - Transaction receipt
Expand Down Expand Up @@ -113,6 +125,45 @@ export class PDPVerifier {
}
}

/**
* Get active pieces for a data set with pagination
* @param dataSetId - The PDPVerifier data set ID
* @param options - Optional configuration object
* @param options.offset - The offset to start from (default: 0)
* @param options.limit - The maximum number of pieces to return (default: 100)
* @param options.signal - Optional AbortSignal to cancel the operation
* @returns Object containing pieces, piece IDs, raw sizes, and hasMore flag
*/
async getActivePieces(
dataSetId: number,
options?: {
offset?: number
limit?: number
signal?: AbortSignal
}
): Promise<{
pieces: Array<{ data: string }>
pieceIds: number[]
rawSizes: number[]
hasMore: boolean
}> {
const offset = options?.offset ?? 0
const limit = options?.limit ?? 100
const signal = options?.signal

if (signal?.aborted) {
throw new Error('Operation aborted')
}

const result = await this._contract.getActivePieces(dataSetId, offset, limit)
return {
pieces: result[0].map((piece: any) => ({ data: piece.data })),
pieceIds: result[1].map((id: bigint) => Number(id)),
rawSizes: result[2].map((size: bigint) => Number(size)),
hasMore: result[3],
}
}

/**
* Get the PDPVerifier contract address for the current network
*/
Expand Down
2 changes: 2 additions & 0 deletions src/piece/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ export {
asPieceCID,
calculate,
createPieceCIDStream,
getLeafCount,
getRawSize,
type LegacyPieceCID,
type PieceCID,
} from './piece.ts'
42 changes: 42 additions & 0 deletions src/piece/piece.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import type { LegacyPieceLink as LegacyPieceCIDType, PieceLink as PieceCIDType } from '@web3-storage/data-segment'
import * as Hasher from '@web3-storage/data-segment/multihash'
import { fromLink } from '@web3-storage/data-segment/piece'
import { CID } from 'multiformats/cid'
import * as Raw from 'multiformats/codecs/raw'
import * as Digest from 'multiformats/hashes/digest'
Expand Down Expand Up @@ -164,6 +165,47 @@ export function calculate(data: Uint8Array): PieceCID {
return Link.create(Raw.code, digest)
}

/**
* Extract leaf count from a PieceCID v2
* @param pieceCid - The PieceCID to extract leaf count from
* @returns The leaf count (number of leaves in the merkle tree) or null if invalid
*/
export function getLeafCount(pieceCid: PieceCID | CID | string): number | null {
const validPieceCid = asPieceCID(pieceCid)
if (!validPieceCid) {
return null
}

try {
const piece = fromLink(validPieceCid)
// The leaf count is 2^height
return 2 ** piece.height
} catch {
return null
}
}

/**
* Extract raw size from a PieceCID v2
* @param pieceCid - The PieceCID to extract raw size from
* @returns The raw size in bytes or null if invalid
*/
export function getRawSize(pieceCid: PieceCID | CID | string): number | null {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not quite right because height only gets us the padded size, we need to unpad using the padding that's also encoded. See #283

const validPieceCid = asPieceCID(pieceCid)
if (!validPieceCid) {
return null
}

try {
const piece = fromLink(validPieceCid)
// Raw size is leaf count * 32 bytes
const leafCount = 2 ** piece.height
return leafCount * 32
} catch {
return null
}
}

/**
* Create a TransformStream that calculates PieceCID while streaming data through it
* This allows calculating PieceCID without buffering the entire data in memory
Expand Down
118 changes: 116 additions & 2 deletions src/storage/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,18 @@
* ```
*/

import type { ethers } from 'ethers'
import { ethers } from 'ethers'
import { CID } from 'multiformats/cid'
import type { PaymentsService } from '../payments/index.ts'
import { PDPAuthHelper, PDPServer } from '../pdp/index.ts'
import { asPieceCID } from '../piece/index.ts'
import { PDPVerifier } from '../pdp/verifier.ts'
import { asPieceCID, getLeafCount, getRawSize } from '../piece/index.ts'
import { SPRegistryService } from '../sp-registry/index.ts'
import type { ProviderInfo } from '../sp-registry/types.ts'
import type { Synapse } from '../synapse.ts'
import type {
DataSetPieceData,
DataSetPieceDataWithLeafCount,
DownloadOptions,
EnhancedDataSetInfo,
MetadataEntry,
Expand Down Expand Up @@ -1260,6 +1264,116 @@ export class StorageContext {
return dataSetData.pieces.map((piece) => piece.pieceCid)
}

async getPiecesWithDetails(options?: {
batchSize?: number
signal?: AbortSignal
}): Promise<DataSetPieceDataWithLeafCount[]> {
const pieces: DataSetPieceDataWithLeafCount[] = []

for await (const piece of this.getAllActivePiecesGenerator(options)) {
// TODO: should we call the contract for leaf count? i.e. pdpVerifier.getPieceLeafCount(this._dataSetId, piece.pieceId)
const leafCount = getLeafCount(piece.pieceCid) ?? 0
Comment on lines +1274 to +1275
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's ditch this

// TODO: is there a better way to get the raw size?
const rawSize = getRawSize(piece.pieceCid) ?? 0
Comment on lines +1274 to +1277
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the only place where we're getting setting rawSize and leafCount now.. should we be calling the contract instead of these helper methods?

pieces.push({
pieceId: piece.pieceId,
pieceCid: piece.pieceCid,
rawSize,
leafCount,
subPieceCid: piece.pieceCid,
subPieceOffset: 0, // TODO: figure out how to get the sub piece offset
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how do I fulfill this value accurately?

} satisfies DataSetPieceDataWithLeafCount)
}

return pieces
}

/**
* Get all active pieces for this data set directly from the PDPVerifier contract.
* This bypasses Curio and gets the authoritative piece list from the blockchain.
* @param options - Optional configuration object
* @param options.batchSize - The batch size for each pagination call (default: 100)
* @param options.signal - Optional AbortSignal to cancel the operation
* @returns Array of all active pieces with their details including PieceCID
*/
async getAllActivePieces(options?: { batchSize?: number; signal?: AbortSignal }): Promise<Array<DataSetPieceData>> {
const allPieces: Array<DataSetPieceData> = []

for await (const piece of this.getAllActivePiecesGenerator(options)) {
allPieces.push({
pieceId: piece.pieceId,
pieceCid: piece.pieceCid,
subPieceCid: piece.pieceCid,
subPieceOffset: 0, // TODO: figure out how to get the sub piece offset
} satisfies DataSetPieceData)
}

return allPieces
}

/**
* Get all active pieces for this data set as an async generator.
* This provides lazy evaluation and better memory efficiency for large data sets.
* Gets data directly from PDPVerifier contract (source of truth) rather than Curio.
* @param options - Optional configuration object
* @param options.batchSize - The batch size for each pagination call (default: 100)
* @param options.signal - Optional AbortSignal to cancel the operation
* @yields Individual pieces with their details including PieceCID
*/
async *getAllActivePiecesGenerator(options?: {
batchSize?: number
signal?: AbortSignal
}): AsyncGenerator<DataSetPieceData> {
const pdpVerifierAddress = this._warmStorageService.getPDPVerifierAddress()
const pdpVerifier = new PDPVerifier(this._synapse.getProvider(), pdpVerifierAddress)

const batchSize = options?.batchSize ?? 100
const signal = options?.signal
let offset = 0
let hasMore = true

while (hasMore) {
if (signal?.aborted) {
throw createError('StorageContext', 'getAllActivePiecesGenerator', 'Operation aborted')
}

const result = await pdpVerifier.getActivePieces(this._dataSetId, { offset, limit: batchSize, signal })

// Yield pieces one by one for lazy evaluation
for (let i = 0; i < result.pieces.length; i++) {
if (signal?.aborted) {
throw createError('StorageContext', 'getAllActivePiecesGenerator', 'Operation aborted')
}

// Parse the piece data as a PieceCID
// The contract stores the full PieceCID multihash digest (including height and padding)
// The data comes as a hex string from ethers, we need to decode it as bytes then as a CID
const pieceDataHex = result.pieces[i].data
const pieceDataBytes = ethers.getBytes(pieceDataHex)

const cid = CID.decode(pieceDataBytes)
const pieceCid = asPieceCID(cid)
if (!pieceCid) {
throw createError(
'StorageContext',
'getAllActivePiecesGenerator',
`Invalid PieceCID returned from contract for piece ${result.pieceIds[i]}`
)
}

yield {
pieceId: result.pieceIds[i],
pieceCid,
subPieceCid: pieceCid,
subPieceOffset: 0, // TODO: figure out how to get the sub piece offset
} satisfies DataSetPieceData
Comment on lines +1348 to +1369
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: a core contributor should really eyeball this to make sure i'm doing things properly

}

hasMore = result.hasMore
offset += batchSize
}
}

/**
* Check if a piece exists on this service provider.
* @param pieceCid - The PieceCID (piece CID) to check
Expand Down
42 changes: 42 additions & 0 deletions src/test/pdp-verifier.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,48 @@ describe('PDPVerifier', () => {
})
})

describe('getActivePieces', () => {
it('should handle AbortSignal', async () => {
const controller = new AbortController()
controller.abort()

try {
await pdpVerifier.getActivePieces(123, { signal: controller.signal })
assert.fail('Should have thrown an error')
} catch (error: any) {
assert.equal(error.message, 'Operation aborted')
}
})

it('should be callable with default options', async () => {
assert.isFunction(pdpVerifier.getActivePieces)

mockProvider.call = async (transaction: any) => {
const data = transaction.data
if (data?.startsWith('0x39f51544') === true) {
// getActivePieces selector
return ethers.AbiCoder.defaultAbiCoder().encode(
['tuple(bytes data)[]', 'uint256[]', 'uint256[]', 'bool'],
[
[{ data: '0x1234567890123456789012345678901234567890123456789012345678901234' }],
[1, 2, 3],
[4, 5, 6],
false,
]
)
}
return `0x${'0'.repeat(64)}`
}

const result = await pdpVerifier.getActivePieces(123)
assert.equal(result.pieces.length, 1)
assert.equal(result.pieceIds.length, 3)
assert.equal(result.rawSizes.length, 3)
assert.equal(result.hasMore, false)
assert.equal(result.pieces[0].data, '0x1234567890123456789012345678901234567890123456789012345678901234')
})
})

describe('getContractAddress', () => {
it('should return the contract address', () => {
const address = pdpVerifier.getContractAddress()
Expand Down
63 changes: 62 additions & 1 deletion src/test/piecelink.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@ import type { API } from '@web3-storage/data-segment'
import { Size, toLink } from '@web3-storage/data-segment/piece'
import { assert } from 'chai'
import { CID } from 'multiformats/cid'
import { asLegacyPieceCID, asPieceCID, calculate, createPieceCIDStream, type PieceCID } from '../piece/index.ts'
import {
asLegacyPieceCID,
asPieceCID,
calculate,
createPieceCIDStream,
getLeafCount,
getRawSize,
type PieceCID,
} from '../piece/index.ts'

// https://github.com/filecoin-project/go-fil-commp-hashhash/blob/master/testdata/zero.txt
const zeroPieceCidFixture = `
Expand Down Expand Up @@ -246,4 +254,57 @@ describe('PieceCID utilities', () => {
// more complex async coordination, so we keep this test simple
})
})

describe('getLeafCount', () => {
zeroPieceCidFixture.forEach(([size, , v1]) => {
it(`should extract correct leaf count for size ${size}`, () => {
const v2 = toPieceCID(BigInt(size), v1)
const leafCount = getLeafCount(v2)

// Expected leaf count is 2^height where height is calculated from size
const expectedHeight = Size.Unpadded.toHeight(BigInt(size))
const expectedLeafCount = 2 ** expectedHeight

assert.isNotNull(leafCount)
assert.strictEqual(leafCount, expectedLeafCount)
Comment on lines +264 to +269
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this accurate?

})
})

it('should return null for invalid PieceCID', () => {
const result = getLeafCount(invalidCidString)
assert.isNull(result)
})

it('should return null for null input', () => {
const result = getLeafCount(null as any)
assert.isNull(result)
})
})

describe('getRawSize', () => {
zeroPieceCidFixture.forEach(([size, , v1]) => {
it(`should extract correct raw size for size ${size}`, () => {
const v2 = toPieceCID(BigInt(size), v1)
const rawSize = getRawSize(v2)

// Expected raw size is leaf count * 32
const expectedHeight = Size.Unpadded.toHeight(BigInt(size))
const expectedLeafCount = 2 ** expectedHeight
const expectedRawSize = expectedLeafCount * 32
Comment on lines +290 to +293
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this accurate?


assert.isNotNull(rawSize)
assert.strictEqual(rawSize, expectedRawSize)
})
})

it('should return null for invalid PieceCID', () => {
const result = getRawSize(invalidCidString)
assert.isNull(result)
})

it('should return null for null input', () => {
const result = getRawSize(null as any)
assert.isNull(result)
})
})
})
Loading