diff --git a/packages/runtime/container-runtime/src/blobManager/blobManager.ts b/packages/runtime/container-runtime/src/blobManager/blobManager.ts index 512764564eb0..a386d009cefb 100644 --- a/packages/runtime/container-runtime/src/blobManager/blobManager.ts +++ b/packages/runtime/container-runtime/src/blobManager/blobManager.ts @@ -181,13 +181,12 @@ export class BlobManager { private readonly internalEvents = createEmitter(); /** - * Map of local IDs to storage IDs. Contains identity entries (storageId → storageId) for storage IDs. All requested IDs should - * be a key in this map. Blobs created while the container is detached are stored in IDetachedBlobStorage which - * gives local IDs; the storage IDs are filled in at attach time. - * Note: It contains mappings from all clients, i.e., from remote clients as well. local ID comes from the client - * that uploaded the blob but its mapping to storage ID is needed in all clients in order to retrieve the blob. + * Map of local IDs to storage IDs. Also includes identity mappings of storage ID to storage ID for all known + * storage IDs. All requested IDs must be a key in this map. Blobs created while the container is detached are + * stored in IDetachedBlobStorage which gives pseudo storage IDs; the real storage IDs are filled in at attach + * time via setRedirectTable(). */ - private readonly redirectTable: Map; + private readonly redirectTable: Map; /** * Blobs which we have not yet seen a BlobAttach op round-trip and not yet attached to a DDS. @@ -206,13 +205,13 @@ export class BlobManager { private readonly routeContext: IFluidHandleContext; private readonly storage: Pick; // Called when a blob node is requested. blobPath is the path of the blob's node in GC's graph. - // blobPath's format - `//`. + // blobPath's format - `//`. private readonly blobRequested: (blobPath: string) => void; // Called to check if a blob has been deleted by GC. - // blobPath's format - `//`. + // blobPath's format - `//`. private readonly isBlobDeleted: (blobPath: string) => boolean; private readonly runtime: IBlobManagerRuntime; - private readonly localBlobIdGenerator: () => string; + private readonly localIdGenerator: () => string; private readonly createBlobPayloadPending: boolean; @@ -233,14 +232,14 @@ export class BlobManager { */ sendBlobAttachOp: (localId: string, storageId: string) => void; // Called when a blob node is requested. blobPath is the path of the blob's node in GC's graph. - // blobPath's format - `//`. + // blobPath's format - `//`. readonly blobRequested: (blobPath: string) => void; // Called to check if a blob has been deleted by GC. - // blobPath's format - `//`. + // blobPath's format - `//`. readonly isBlobDeleted: (blobPath: string) => boolean; readonly runtime: IBlobManagerRuntime; stashedBlobs: IPendingBlobs | undefined; - readonly localBlobIdGenerator?: (() => string) | undefined; + readonly localIdGenerator?: (() => string) | undefined; readonly createBlobPayloadPending: boolean; }) { const { @@ -251,7 +250,7 @@ export class BlobManager { blobRequested, isBlobDeleted, runtime, - localBlobIdGenerator, + localIdGenerator, createBlobPayloadPending, } = props; this.routeContext = routeContext; @@ -259,7 +258,7 @@ export class BlobManager { this.blobRequested = blobRequested; this.isBlobDeleted = isBlobDeleted; this.runtime = runtime; - this.localBlobIdGenerator = localBlobIdGenerator ?? uuid; + this.localIdGenerator = localIdGenerator ?? uuid; this.createBlobPayloadPending = createBlobPayloadPending; this.mc = createChildMonitoringContext({ @@ -267,13 +266,9 @@ export class BlobManager { namespace: "BlobManager", }); - this.redirectTable = toRedirectTable( - blobManagerLoadInfo, - this.mc.logger, - this.runtime.attachState, - ); + this.redirectTable = toRedirectTable(blobManagerLoadInfo, this.mc.logger); - this.sendBlobAttachOp = (localId: string, blobId: string) => { + this.sendBlobAttachOp = (localId: string, storageId: string) => { const pendingEntry = this.pendingBlobs.get(localId); assert( pendingEntry !== undefined, @@ -302,7 +297,7 @@ export class BlobManager { } } pendingEntry.opsent = true; - sendBlobAttachOp(localId, blobId); + sendBlobAttachOp(localId, storageId); }; } @@ -329,59 +324,49 @@ export class BlobManager { }); } - public hasBlob(blobId: string): boolean { - return this.redirectTable.get(blobId) !== undefined; + public hasBlob(localId: string): boolean { + return this.redirectTable.get(localId) !== undefined; } /** * Retrieve the blob with the given local blob id. - * @param blobId - The local blob id. Likely coming from a handle. + * @param localId - The local blob id. Likely coming from a handle. * @param payloadPending - Whether we suspect the payload may be pending and not available yet. * @returns A promise which resolves to the blob contents */ - public async getBlob(blobId: string, payloadPending: boolean): Promise { + public async getBlob(localId: string, payloadPending: boolean): Promise { // Verify that the blob is not deleted, i.e., it has not been garbage collected. If it is, this will throw // an error, failing the call. - this.verifyBlobNotDeleted(blobId); + this.verifyBlobNotDeleted(localId); // Let runtime know that the corresponding GC node was requested. // Note that this will throw if the blob is inactive or tombstoned and throwing on incorrect usage // is configured. - this.blobRequested(getGCNodePathFromBlobId(blobId)); + this.blobRequested(getGCNodePathFromLocalId(localId)); - const pending = this.pendingBlobs.get(blobId); + const pending = this.pendingBlobs.get(localId); if (pending) { return pending.blob; } - let storageId: string; - if (this.runtime.attachState === AttachState.Detached) { - assert(this.redirectTable.has(blobId), 0x383 /* requesting unknown blobs */); - - // Blobs created while the container is detached are stored in IDetachedBlobStorage. - // The 'IContainerStorageService.readBlob()' call below will retrieve these via localId. - storageId = blobId; - } else { - const attachedStorageId = this.redirectTable.get(blobId); - if (!payloadPending) { - // Only blob handles explicitly marked with pending payload are permitted to exist without - // yet knowing their storage id. Otherwise they must already be associated with a storage id. - assert(attachedStorageId !== undefined, 0x11f /* "requesting unknown blobs" */); - } - // If we didn't find it in the redirectTable, assume the attach op is coming eventually and wait. - // We do this even if the local client doesn't have the blob payloadPending flag enabled, in case a - // remote client does have it enabled. This wait may be infinite if the uploading client failed - // the upload and doesn't exist anymore. - storageId = - attachedStorageId ?? - (await new Promise((resolve) => { - const onProcessBlobAttach = (localId: string, _storageId: string): void => { - if (localId === blobId) { - this.internalEvents.off("processedBlobAttach", onProcessBlobAttach); - resolve(_storageId); - } - }; - this.internalEvents.on("processedBlobAttach", onProcessBlobAttach); - })); + let storageId = this.redirectTable.get(localId); + if (storageId === undefined) { + // Only blob handles explicitly marked with pending payload are permitted to exist without + // yet knowing their storage id. Otherwise they must already be associated with a storage id. + // Handles for detached blobs are not payload pending. + assert(payloadPending, 0x11f /* "requesting unknown blobs" */); + // If we didn't find it in the redirectTable and it's payloadPending, assume the attach op is coming + // eventually and wait. We do this even if the local client doesn't have the blob payloadPending flag + // enabled, in case a remote client does have it enabled. This wait may be infinite if the uploading + // client failed the upload and doesn't exist anymore. + storageId = await new Promise((resolve) => { + const onProcessBlobAttach = (_localId: string, _storageId: string): void => { + if (_localId === localId) { + this.internalEvents.off("processedBlobAttach", onProcessBlobAttach); + resolve(_storageId); + } + }; + this.internalEvents.on("processedBlobAttach", onProcessBlobAttach); + }); } return PerformanceEvent.timedExecAsync( @@ -417,7 +402,7 @@ export class BlobManager { } : undefined; return new BlobHandle( - getGCNodePathFromBlobId(localId), + getGCNodePathFromLocalId(localId), this.routeContext, async () => this.getBlob(localId, false), false, // payloadPending @@ -428,11 +413,13 @@ export class BlobManager { private async createBlobDetached( blob: ArrayBufferLike, ): Promise> { + const localId = this.localIdGenerator(); // Blobs created while the container is detached are stored in IDetachedBlobStorage. - // The 'IContainerStorageService.createBlob()' call below will respond with a localId. - const response = await this.storage.createBlob(blob); - this.setRedirection(response.id, undefined); - return this.getBlobHandle(response.id); + // The 'IContainerStorageService.createBlob()' call below will respond with a pseudo storage ID. + // That pseudo storage ID will be replaced with the real storage ID at attach time. + const { id: detachedStorageId } = await this.storage.createBlob(blob); + this.setRedirection(localId, detachedStorageId); + return this.getBlobHandle(localId); } public async createBlob( @@ -467,7 +454,7 @@ export class BlobManager { // Create a local ID for the blob. After uploading it to storage and before returning it, a local ID to // storage ID mapping is created. - const localId = this.localBlobIdGenerator(); + const localId = this.localIdGenerator(); const pendingEntry: PendingBlob = { blob, handleP: new Deferred(), @@ -494,10 +481,10 @@ export class BlobManager { private createBlobWithPayloadPending( blob: ArrayBufferLike, ): IFluidHandleInternalPayloadPending { - const localId = this.localBlobIdGenerator(); + const localId = this.localIdGenerator(); const blobHandle = new BlobHandle( - getGCNodePathFromBlobId(localId), + getGCNodePathFromLocalId(localId), this.routeContext, async () => blob, true, // payloadPending @@ -581,7 +568,7 @@ export class BlobManager { * Set up a mapping in the redirect table from fromId to toId. Also, notify the runtime that a reference is added * which is required for GC. */ - private setRedirection(fromId: string, toId: string | undefined): void { + private setRedirection(fromId: string, toId: string): void { this.redirectTable.set(fromId, toId); } @@ -630,7 +617,7 @@ export class BlobManager { if (!entry.opsent) { this.sendBlobAttachOp(localId, response.id); } - const storageIds = getStorageIds(this.redirectTable, this.runtime.attachState); + const storageIds = getStorageIds(this.redirectTable); if (storageIds.has(response.id)) { // The blob is de-duped. Set up a local ID to storage ID mapping and return the blob. Since this is // an existing blob, we don't have to wait for the op to be ack'd since this step has already @@ -663,7 +650,7 @@ export class BlobManager { */ public reSubmit(metadata: Record | undefined): void { assert(isBlobMetadata(metadata), 0xc01 /* Expected blob metadata for a BlobAttach op */); - const { localId, blobId } = metadata; + const { localId, blobId: storageId } = metadata; // Any blob that we're actively trying to advance to attached state must have a // pendingBlobs entry. Decline to resubmit for anything else. // For example, we might be asked to resubmit stashed ops for blobs that never had @@ -671,7 +658,7 @@ export class BlobManager { // try to attach them since they won't be accessible to the customer and would just // be considered garbage immediately. if (this.pendingBlobs.has(localId)) { - this.sendBlobAttachOp(localId, blobId); + this.sendBlobAttachOp(localId, storageId); } } @@ -680,19 +667,19 @@ export class BlobManager { isBlobMetadata(message.metadata), 0xc02 /* Expected blob metadata for a BlobAttach op */, ); - const { localId, blobId } = message.metadata; + const { localId, blobId: storageId } = message.metadata; const pendingEntry = this.pendingBlobs.get(localId); if (pendingEntry?.abortSignal?.aborted) { this.deletePendingBlob(localId); return; } - this.setRedirection(localId, blobId); + this.setRedirection(localId, storageId); // set identity (id -> id) entry - this.setRedirection(blobId, blobId); + this.setRedirection(storageId, storageId); if (local) { - const waitingBlobs = this.opsInFlight.get(blobId); + const waitingBlobs = this.opsInFlight.get(storageId); if (waitingBlobs !== undefined) { // For each op corresponding to this storage ID that we are waiting for, resolve the pending blob. // This is safe because the server will keep the blob alive and the op containing the local ID to @@ -703,14 +690,14 @@ export class BlobManager { entry !== undefined, 0x38f /* local online BlobAttach op with no pending blob entry */, ); - this.setRedirection(pendingLocalId, blobId); + this.setRedirection(pendingLocalId, storageId); entry.acked = true; const blobHandle = this.getBlobHandle(pendingLocalId); blobHandle.notifyShared(); entry.handleP.resolve(blobHandle); this.deletePendingBlobMaybe(pendingLocalId); } - this.opsInFlight.delete(blobId); + this.opsInFlight.delete(storageId); } const localEntry = this.pendingBlobs.get(localId); if (localEntry) { @@ -721,11 +708,11 @@ export class BlobManager { this.deletePendingBlobMaybe(localId); } } - this.internalEvents.emit("processedBlobAttach", localId, blobId); + this.internalEvents.emit("processedBlobAttach", localId, storageId); } public summarize(telemetryContext?: ITelemetryContext): ISummaryTreeWithStats { - return summarizeBlobManagerState(this.redirectTable, this.runtime.attachState); + return summarizeBlobManagerState(this.redirectTable); } /** @@ -737,13 +724,12 @@ export class BlobManager { public getGCData(fullGC: boolean = false): IGarbageCollectionData { const gcData: IGarbageCollectionData = { gcNodes: {} }; for (const [localId, storageId] of this.redirectTable) { - assert(!!storageId, 0x390 /* Must be attached to get GC data */); - // Only return local ids as GC nodes because a blob can only be referenced via its local id. The storage - // id entries have the same key and value, ignore them. - // The outbound routes are empty because a blob node cannot reference other nodes. It can only be referenced - // by adding its handle to a referenced DDS. + // Don't report the identity mappings to GC - these exist to service old handles that referenced the storage + // IDs directly. We'll implicitly clean them up if all of their localId references get GC'd first. if (localId !== storageId) { - gcData.gcNodes[getGCNodePathFromBlobId(localId)] = []; + // The outbound routes are empty because a blob node cannot reference other nodes. It can only be referenced + // by adding its handle to a referenced DDS. + gcData.gcNodes[getGCNodePathFromLocalId(localId)] = []; } } return gcData; @@ -764,58 +750,55 @@ export class BlobManager { * Delete blobs with the given routes from the redirect table. * * @remarks - * The routes are GC nodes paths of format -`//`. The blob ids are all local ids. + * The routes are GC nodes paths of format -`//`. * Deleting the blobs involves 2 steps: * * 1. The redirect table entry for the local ids are deleted. * - * 2. If the storage ids corresponding to the deleted local ids are not in-use anymore, the redirect table entries - * for the storage ids are deleted as well. + * 2. If the storage ids corresponding to the deleted local ids are not referenced by any further local ids, the + * identity mappings in the redirect table are deleted as well. * * Note that this does not delete the blobs from storage service immediately. Deleting the blobs from redirect table - * will remove them the next summary. The service would them delete them some time in the future. + * will ensure we don't create an attachment blob for them at the next summary. The service would then delete them + * some time in the future. */ private deleteBlobsFromRedirectTable(blobRoutes: readonly string[]): void { - if (blobRoutes.length === 0) { - return; - } - - // This tracks the storage ids of local ids that are deleted. After the local ids have been deleted, if any of - // these storage ids are unused, they will be deleted as well. + // maybeUnusedStorageIds is used to compute the set of storage IDs that *used to have a local ID*, but that + // local ID is being deleted. const maybeUnusedStorageIds: Set = new Set(); for (const route of blobRoutes) { - const blobId = getBlobIdFromGCNodePath(route); + const localId = getLocalIdFromGCNodePath(route); // If the blob hasn't already been deleted, log an error because this should never happen. // If the blob has already been deleted, log a telemetry event. This can happen because multiple GC // sweep ops can contain the same data store. It would be interesting to track how often this happens. const alreadyDeleted = this.isBlobDeleted(route); - if (!this.redirectTable.has(blobId)) { + const storageId = this.redirectTable.get(localId); + if (storageId === undefined) { this.mc.logger.sendTelemetryEvent({ eventName: "DeletedAttachmentBlobNotFound", category: alreadyDeleted ? "generic" : "error", - blobId, + blobId: localId, details: { alreadyDeleted }, }); continue; } - const storageId = this.redirectTable.get(blobId); - assert(!!storageId, 0x5bb /* Must be attached to run GC */); maybeUnusedStorageIds.add(storageId); - this.redirectTable.delete(blobId); + this.redirectTable.delete(localId); } - // Find out storage ids that are in-use and remove them from maybeUnusedStorageIds. A storage id is in-use if - // the redirect table has a local id -> storage id entry for it. - for (const [localId, storageId] of this.redirectTable.entries()) { - assert(!!storageId, 0x5bc /* Must be attached to run GC */); - // For every storage id, the redirect table has a id -> id entry. These do not make the storage id in-use. - if (maybeUnusedStorageIds.has(storageId) && localId !== storageId) { + // Remove any storage IDs that still have local IDs referring to them (excluding the identity mapping). + for (const [localId, storageId] of this.redirectTable) { + if (localId !== storageId) { maybeUnusedStorageIds.delete(storageId); } } - // For unused storage ids, delete their id -> id entries from the redirect table. - // This way they'll be absent from the next summary, and the service is free to delete them from storage. + // Now delete any identity mappings (storage ID -> storage ID) from the redirect table that used to be + // referenced by a distinct local ID. This way they'll be absent from the next summary, and the service + // is free to delete them from storage. + // WARNING: This can potentially delete identity mappings that are still referenced, if storage deduping + // has let us add a local ID -> storage ID mapping that is later deleted. AB#47337 tracks this issue + // and possible solutions. for (const storageId of maybeUnusedStorageIds) { this.redirectTable.delete(storageId); } @@ -825,12 +808,12 @@ export class BlobManager { * Verifies that the blob with given id is not deleted, i.e., it has not been garbage collected. If the blob is GC'd, * log an error and throw if necessary. */ - private verifyBlobNotDeleted(blobId: string): void { - if (!this.isBlobDeleted(getGCNodePathFromBlobId(blobId))) { + private verifyBlobNotDeleted(localId: string): void { + if (!this.isBlobDeleted(getGCNodePathFromLocalId(localId))) { return; } - const request = { url: blobId }; + const request = { url: localId }; const error = responseToException( createResponseError(404, `Blob was deleted`, request), request, @@ -846,20 +829,34 @@ export class BlobManager { throw error; } - public setRedirectTable(table: Map): void { + /** + * Called in detached state just prior to attaching, this will update the redirect table by + * converting the pseudo storage IDs into real storage IDs using the provided detachedStorageTable. + * The provided table must have exactly the same set of pseudo storage IDs as are found in the redirect table. + * @param detachedStorageTable - A map of pseudo storage IDs to real storage IDs. + */ + public patchRedirectTable(detachedStorageTable: Map): void { assert( this.runtime.attachState === AttachState.Detached, 0x252 /* "redirect table can only be set in detached container" */, ); + // The values of the redirect table are the pseudo storage IDs, which are the keys of the + // detachedStorageTable. We expect to have a many:1 mapping from local IDs to pseudo + // storage IDs (many in the case that the storage dedupes the blob). assert( - this.redirectTable.size === table.size, + new Set(this.redirectTable.values()).size === detachedStorageTable.size, 0x391 /* Redirect table size must match BlobManager's local ID count */, ); - for (const [localId, storageId] of table) { - assert(this.redirectTable.has(localId), 0x254 /* "unrecognized id in redirect table" */); - this.setRedirection(localId, storageId); + // Taking a snapshot of the redirect table entries before iterating, because + // we will be adding identity mappings to the the redirect table as we iterate + // and we don't want to include those in the iteration. + const redirectTableEntries = [...this.redirectTable.entries()]; + for (const [localId, detachedStorageId] of redirectTableEntries) { + const newStorageId = detachedStorageTable.get(detachedStorageId); + assert(newStorageId !== undefined, "Couldn't find a matching storage ID"); + this.setRedirection(localId, newStorageId); // set identity (id -> id) entry - this.setRedirection(storageId, storageId); + this.setRedirection(newStorageId, newStorageId); } } @@ -896,17 +893,17 @@ export class BlobManager { } /** - * For a blobId, returns its path in GC's graph. The node path is of the format `//`. + * For a localId, returns its path in GC's graph. The node path is of the format `//`. * This path must match the path of the blob handle returned by the createBlob API because blobs are marked * referenced by storing these handles in a referenced DDS. */ -const getGCNodePathFromBlobId = (blobId: string): string => - `/${blobManagerBasePath}/${blobId}`; +const getGCNodePathFromLocalId = (localId: string): string => + `/${blobManagerBasePath}/${localId}`; /** - * For a given GC node path, return the blobId. The node path is of the format `//`. + * For a given GC node path, return the localId. The node path is of the format `//`. */ -const getBlobIdFromGCNodePath = (nodePath: string): string => { +const getLocalIdFromGCNodePath = (nodePath: string): string => { const pathParts = nodePath.split("/"); assert(areBlobPathParts(pathParts), 0x5bd /* Invalid blob node path */); return pathParts[2]; diff --git a/packages/runtime/container-runtime/src/blobManager/blobManagerSnapSum.ts b/packages/runtime/container-runtime/src/blobManager/blobManagerSnapSum.ts index c8867b818a68..e19138cad851 100644 --- a/packages/runtime/container-runtime/src/blobManager/blobManagerSnapSum.ts +++ b/packages/runtime/container-runtime/src/blobManager/blobManagerSnapSum.ts @@ -3,11 +3,7 @@ * Licensed under the MIT License. */ -import { - AttachState, - type IContainerContext, -} from "@fluidframework/container-definitions/internal"; -import { assert } from "@fluidframework/core-utils/internal"; +import type { IContainerContext } from "@fluidframework/container-definitions/internal"; import { readAndParse } from "@fluidframework/driver-utils/internal"; import type { ISummaryTreeWithStats } from "@fluidframework/runtime-definitions/internal"; import { SummaryTreeBuilder } from "@fluidframework/runtime-utils/internal"; @@ -60,72 +56,54 @@ const loadV1 = async ( export const toRedirectTable = ( blobManagerLoadInfo: IBlobManagerLoadInfo, logger: ITelemetryLoggerExt, - attachState: AttachState, -): Map => { +): Map => { logger.sendTelemetryEvent({ eventName: "AttachmentBlobsLoaded", count: blobManagerLoadInfo.ids?.length ?? 0, redirectTable: blobManagerLoadInfo.redirectTable?.length, }); - const redirectTable = new Map(blobManagerLoadInfo.redirectTable); - const detached = attachState !== AttachState.Attached; - if (blobManagerLoadInfo.ids) { - // If we are detached, we don't have storage IDs yet, so set to undefined - // Otherwise, set identity (id -> id) entries. - for (const entry of blobManagerLoadInfo.ids) { - redirectTable.set(entry, detached ? undefined : entry); + const redirectTable = new Map(blobManagerLoadInfo.redirectTable); + if (blobManagerLoadInfo.ids !== undefined) { + for (const storageId of blobManagerLoadInfo.ids) { + // Older versions of the runtime used the storage ID directly in the handle, + // rather than routing through the redirectTable. To support old handles that + // were created in this way but unify handling through the redirectTable, we + // add identity mappings to the redirect table at load. These identity entries + // will be excluded during summarization. + redirectTable.set(storageId, storageId); } } return redirectTable; }; export const summarizeBlobManagerState = ( - redirectTable: Map, - attachState: AttachState, -): ISummaryTreeWithStats => summarizeV1(redirectTable, attachState); + redirectTable: Map, +): ISummaryTreeWithStats => summarizeV1(redirectTable); -const summarizeV1 = ( - redirectTable: Map, - attachState: AttachState, -): ISummaryTreeWithStats => { - const storageIds = getStorageIds(redirectTable, attachState); - - // if storageIds is empty, it means we are detached and have only local IDs, or that there are no blobs attached - const blobIds = storageIds.size > 0 ? [...storageIds] : [...redirectTable.keys()]; +const summarizeV1 = (redirectTable: Map): ISummaryTreeWithStats => { const builder = new SummaryTreeBuilder(); - for (const blobId of blobIds) { - builder.addAttachment(blobId); + const storageIds = getStorageIds(redirectTable); + for (const storageId of storageIds) { + // The Attachment is inspectable by storage, which lets it detect that the blob is referenced + // and therefore should not be GC'd. + builder.addAttachment(storageId); } - // Any non-identity entries in the table need to be saved in the summary - if (redirectTable.size > blobIds.length) { - builder.addBlob( - redirectTableBlobName, - // filter out identity entries - JSON.stringify( - [...redirectTable.entries()].filter(([localId, storageId]) => localId !== storageId), - ), - ); + // Exclude identity mappings from the redirectTable summary. Note that + // the storageIds of the identity mappings are still included in the Attachments + // above, so we expect these identity mappings will be recreated at load + // time in toRedirectTable even if there is no non-identity mapping in + // the redirectTable. + const nonIdentityRedirectTableEntries = [...redirectTable.entries()].filter( + ([localId, storageId]) => localId !== storageId, + ); + if (nonIdentityRedirectTableEntries.length > 0) { + builder.addBlob(redirectTableBlobName, JSON.stringify(nonIdentityRedirectTableEntries)); } return builder.getSummaryTree(); }; -export const getStorageIds = ( - redirectTable: Map, - attachState: AttachState, -): Set => { - const ids = new Set(redirectTable.values()); - - // If we are detached, we will not have storage IDs, only undefined - const undefinedValueInTable = ids.delete(undefined); - - // For a detached container, entries are inserted into the redirect table with an undefined storage ID. - // For an attached container, entries are inserted w/storage ID after the BlobAttach op round-trips. - assert( - !undefinedValueInTable || (attachState === AttachState.Detached && ids.size === 0), - 0x382 /* 'redirectTable' must contain only undefined while detached / defined values while attached */, - ); - - return ids as Set; +export const getStorageIds = (redirectTable: Map): Set => { + return new Set(redirectTable.values()); }; diff --git a/packages/runtime/container-runtime/src/containerRuntime.ts b/packages/runtime/container-runtime/src/containerRuntime.ts index b7fe3fb6b18e..463d4dff0b4e 100644 --- a/packages/runtime/container-runtime/src/containerRuntime.ts +++ b/packages/runtime/container-runtime/src/containerRuntime.ts @@ -3708,7 +3708,7 @@ export class ContainerRuntime telemetryContext?: ITelemetryContext, ): ISummaryTree { if (blobRedirectTable) { - this.blobManager.setRedirectTable(blobRedirectTable); + this.blobManager.patchRedirectTable(blobRedirectTable); } // We can finalize any allocated IDs since we're the only client diff --git a/packages/runtime/container-runtime/src/test/blobHandles.spec.ts b/packages/runtime/container-runtime/src/test/blobHandles.spec.ts index c039c2252c0e..a78e6784ce9e 100644 --- a/packages/runtime/container-runtime/src/test/blobHandles.spec.ts +++ b/packages/runtime/container-runtime/src/test/blobHandles.spec.ts @@ -78,7 +78,7 @@ describe("BlobHandles", () => { d.resolve(); }, stashedBlobs: {}, - localBlobIdGenerator: () => "localId", + localIdGenerator: () => "localId", isBlobDeleted: () => false, storage: failProxy({ createBlob: async () => { @@ -118,7 +118,7 @@ describe("BlobHandles", () => { d.resolve(); }, stashedBlobs: {}, - localBlobIdGenerator: () => "localId", + localIdGenerator: () => "localId", storage: failProxy({ createBlob: async () => { count++; diff --git a/packages/runtime/container-runtime/src/test/blobManager.spec.ts b/packages/runtime/container-runtime/src/test/blobManager.spec.ts index 8142c046cce5..679a0d243f1b 100644 --- a/packages/runtime/container-runtime/src/test/blobManager.spec.ts +++ b/packages/runtime/container-runtime/src/test/blobManager.spec.ts @@ -250,7 +250,7 @@ export class MockRuntime table.set(detachedId, id); } this.detachedStorage.blobs.clear(); - this.blobManager.setRedirectTable(table); + this.blobManager.patchRedirectTable(table); } const summary = validateSummary(this); this.attachState = AttachState.Attached; @@ -460,7 +460,7 @@ for (const createBlobPayloadPending of [false, true]) { const summaryData = validateSummary(runtime); assert.strictEqual(summaryData.ids.length, 1); - assert.strictEqual(summaryData.redirectTable, undefined); + assert.strictEqual(summaryData.redirectTable?.length, 1); }); it("detached->attached snapshot", async () => { @@ -704,7 +704,7 @@ for (const createBlobPayloadPending of [false, true]) { const summaryData = validateSummary(runtime); assert.strictEqual(summaryData.ids.length, 1); - assert.strictEqual(summaryData.redirectTable, undefined); + assert.strictEqual(summaryData.redirectTable?.length, 2); }); it("handles deduped IDs in detached->attached", async () => { @@ -729,7 +729,7 @@ for (const createBlobPayloadPending of [false, true]) { const summaryData = validateSummary(runtime); assert.strictEqual(summaryData.ids.length, 1); - assert.strictEqual(summaryData.redirectTable?.length, 4); + assert.strictEqual(summaryData.redirectTable?.length, 6); }); it("can load from summary", async () => { @@ -756,6 +756,45 @@ for (const createBlobPayloadPending of [false, true]) { assert.strictEqual(summaryData2.redirectTable?.length, 3); }); + it("can get blobs by requesting their storage ID", async () => { + await runtime.attach(); + await runtime.connect(); + + const handle = runtime.createBlob(IsoBuffer.from("blob", "utf8")); + await runtime.processAll(); + + await assert.doesNotReject(handle); + + // Using the summary as a simple way to grab the storage ID of the blob we just created + const { + redirectTable, + ids: [storageId], + } = validateSummary(runtime); + assert.strictEqual(redirectTable?.length, 1); + assert.strictEqual(typeof storageId, "string", "Expect storage ID to be in the summary"); + + const blob = await runtime.blobManager.getBlob(storageId, createBlobPayloadPending); + + const runtime2 = new MockRuntime( + mc, + createBlobPayloadPending, + // Loading a second runtime with just the blob attachments and no redirect table + // lets us verify that we still correctly reconstruct the identity mapping during load. + { ids: [storageId] }, + true, + ); + (runtime2.storage as unknown as BaseMockBlobStorage).blobs.set(storageId, blob); + await assert.doesNotReject( + runtime2.blobManager.getBlob(storageId, createBlobPayloadPending), + ); + const { + redirectTable: redirectTable2, + ids: [storageId2], + } = validateSummary(runtime2); + assert.strictEqual(redirectTable2, undefined); + assert.strictEqual(storageId2, storageId, "Expect storage ID to be in the summary"); + }); + it("handles duplicate remote upload", async () => { await runtime.attach(); await runtime.connect(); @@ -831,9 +870,10 @@ for (const createBlobPayloadPending of [false, true]) { }); it("runtime disposed during readBlob - log no error", async () => { - const someId = "someId"; + const someLocalId = "someLocalId"; + const someStorageId = "someStorageId"; // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call -- Accessing private property - (runtime.blobManager as any).setRedirection(someId, undefined); // To appease an assert + (runtime.blobManager as any).setRedirection(someLocalId, someStorageId); // To appease an assert // Mock storage.readBlob to dispose the runtime and throw an error Sinon.stub(runtime.storage, "readBlob").callsFake(async (_id: string) => { @@ -842,7 +882,7 @@ for (const createBlobPayloadPending of [false, true]) { }); await assert.rejects( - async () => runtime.blobManager.getBlob(someId, false), + async () => runtime.blobManager.getBlob(someLocalId, false), (e: Error) => e.message === "BOOM!", "Expected getBlob to throw with test error message", ); @@ -1107,7 +1147,7 @@ for (const createBlobPayloadPending of [false, true]) { }); describe("Garbage Collection", () => { - let redirectTable: Map; + let redirectTable: Map; /** * Creates a blob with the given content and returns its local and storage id. diff --git a/packages/test/test-end-to-end-tests/src/test/blobs.spec.ts b/packages/test/test-end-to-end-tests/src/test/blobs.spec.ts index 320e737e4ee2..e2cd0bc4f9e0 100644 --- a/packages/test/test-end-to-end-tests/src/test/blobs.spec.ts +++ b/packages/test/test-end-to-end-tests/src/test/blobs.spec.ts @@ -396,7 +396,7 @@ for (const createBlobPayloadPending of [undefined, true] as const) { const container2 = await provider.loadTestContainer(testContainerConfig); const snapshot2 = (container2 as any).runtime.blobManager.summarize(); assert.strictEqual(snapshot2.stats.treeNodeCount, 1); - assert.strictEqual(snapshot1.summary.tree[0].id, snapshot2.summary.tree[0].id); + assert.deepStrictEqual(snapshot1.summary.tree, snapshot2.summary.tree); }); // regression test for https://github.com/microsoft/FluidFramework/issues/9702 diff --git a/packages/test/test-end-to-end-tests/src/test/gc/gcSweepAttachmentBlobs.spec.ts b/packages/test/test-end-to-end-tests/src/test/gc/gcSweepAttachmentBlobs.spec.ts index b49ce5171932..dd038d07cdcf 100644 --- a/packages/test/test-end-to-end-tests/src/test/gc/gcSweepAttachmentBlobs.spec.ts +++ b/packages/test/test-end-to-end-tests/src/test/gc/gcSweepAttachmentBlobs.spec.ts @@ -67,7 +67,7 @@ function validateBlobStateInSummary( expectDelete: boolean, expectGCStateHandle: boolean, ) { - const shouldShouldNot = expectDelete ? "should" : "should not"; + const shouldShouldNot = expectDelete ? "should not" : "should"; // Validate that the blob tree should not be in the summary since there should be no attachment blobs. const blobsTree = summaryTree.tree[blobsTreeName] as ISummaryTree;