Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ export interface IConnectionDetails {
export interface IContainer extends IEventProvider<IContainerEvents> {
attach(request: IRequest, attachProps?: {
deltaConnection?: "none" | "delayed";
maxCreateRetries?: number;
}): Promise<void>;
readonly attachState: AttachState;
readonly audience: IAudience;
Expand Down
11 changes: 10 additions & 1 deletion packages/common/container-definitions/src/loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,16 @@ export interface IContainer extends IEventProvider<IContainerEvents> {
*/
attach(
request: IRequest,
attachProps?: { deltaConnection?: "none" | "delayed" },
attachProps?: {
deltaConnection?: "none" | "delayed";
/**
* Maximum number of retries when creating a new container fails with a retriable error.
*
* - The `Fluid.Container.CreateMaxRetries` feature flag takes precedence over this option.
* - If neither is set, retries will continue indefinitely (default behavior).
*/
maxCreateRetries?: number;
},
): Promise<void>;

/**
Expand Down
15 changes: 13 additions & 2 deletions packages/loader/container-loader/src/container.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1214,7 +1214,10 @@ export class Container
public readonly attach = runSingle(
async (
request: IRequest,
attachProps?: { deltaConnection?: "none" | "delayed" },
attachProps?: {
deltaConnection?: "none" | "delayed";
maxCreateRetries?: number;
},
): Promise<void> => {
await PerformanceEvent.timedExecAsync(
this.mc.logger,
Expand Down Expand Up @@ -1286,6 +1289,7 @@ export class Container
this.service = await this.createDocumentService(createNewResolvedUrl, {
mode: "attach",
summary,
maxCreateRetries: attachProps?.maxCreateRetries,
});
}
this.storageAdapter.connectToService(this.service);
Expand Down Expand Up @@ -1533,7 +1537,9 @@ export class Container
*/
private async createDocumentService(
resolvedUrl: IResolvedUrl,
props: { mode: "load" } | { mode: "attach"; summary: ISummaryTree | undefined },
props:
| { mode: "load" }
| { mode: "attach"; summary: ISummaryTree | undefined; maxCreateRetries?: number },
): Promise<IDocumentService> {
let service: IDocumentService;
if (props.mode === "load") {
Expand All @@ -1547,6 +1553,10 @@ export class Container
service.on("metadataUpdate", this.metadataUpdateHandler);
}
} else {
// Feature flag takes precedence over the attach parameter.
const maxRetries =
this.mc.config.getNumber("Fluid.Container.CreateMaxRetries") ?? props.maxCreateRetries;

service = await runWithRetry(
async () =>
this.serviceFactory.createContainer(
Expand All @@ -1559,6 +1569,7 @@ export class Container
this.mc.logger,
{
cancel: this._deltaManager.closeAbortController.signal,
maxRetries,
}, // progress
);
}
Expand Down
36 changes: 36 additions & 0 deletions packages/loader/driver-utils/src/runWithRetry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { delay } from "@fluidframework/core-utils/internal";
import { DriverErrorTypes } from "@fluidframework/driver-definitions/internal";
import {
isFluidError,
wrapError,
type ITelemetryLoggerExt,
} from "@fluidframework/telemetry-utils/internal";

Expand Down Expand Up @@ -45,6 +46,12 @@ export interface IProgress {
* @param error - error object returned from the call.
*/
onRetry?(delayInMs: number, error: unknown): void;

/**
* Maximum number of retries before giving up on a retriable error.
* If undefined, retries will continue indefinitely (default behavior).
*/
maxRetries?: number;
}

/**
Expand Down Expand Up @@ -122,6 +129,35 @@ export async function runWithRetry<T>(
}

numRetries++;

// Check if max retries limit has been reached
if (progress.maxRetries !== undefined && numRetries > progress.maxRetries) {
logger.sendTelemetryEvent(
Comment on lines +128 to +130
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

progress.maxRetries is used without validation. If a caller passes NaN, Infinity, a negative value, or a non-integer, the comparison numRetries > progress.maxRetries can behave unexpectedly (e.g., NaN makes the limit never trigger, reintroducing infinite retries). Consider normalizing the option (e.g., require a finite, non-negative integer; otherwise treat as undefined or throw a non-retriable UsageError).

Copilot uses AI. Check for mistakes.
{
eventName: `${fetchCallName}_maxRetriesExceeded`,
retry: numRetries - 1,
maxRetries: progress.maxRetries,
duration: performanceNow() - startTime,
fetchCallName,
},
error,
);
// Wrap the original error to preserve its details while marking it non-retriable
throw wrapError(
error,
(message) =>
new NonRetryableError(
`runWithRetry failed after max retries: ${message}`,
DriverErrorTypes.genericError,
{
driverVersion: pkgVersion,
fetchCallName,
maxRetries: progress.maxRetries,
},
),
);
}

lastError = error;
// Wait for the calculated time before retrying.
retryAfterMs = calculateMaxWaitTime(retryAfterMs, error);
Expand Down
108 changes: 108 additions & 0 deletions packages/loader/driver-utils/src/test/runWithRetry.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,4 +221,112 @@ describe("runWithRetry Tests", () => {
assert.strictEqual((error as any).reason, "Sample abort reason");
}
});

it("Should stop retrying after maxRetries is exceeded", async () => {
const maxRetries = 3;
let retryTimes = 0;
const api = async (): Promise<boolean> => {
retryTimes += 1;
const error = new Error("Throw error");
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access -- TODO: use a real type
(error as any).canRetry = true;
throw error;
};

try {
await runWithFastSetTimeout(async () =>
runWithRetry(api, "test", logger, {
maxRetries,
}),
);
assert.fail("Should not succeed");
} catch (error) {
// Verify the wrapped error includes the original error message
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-assignment -- TODO: use a real type
const errorMessage = (error as any).message;
assert.strictEqual(errorMessage, "runWithRetry failed after max retries: Throw error");
// Verify the original error is preserved in the cause property
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-assignment -- TODO: use a real type
const causeMessage = (error as any).cause?.message;
assert.strictEqual(causeMessage, "Throw error");
}
// Initial call + maxRetries attempts
assert.strictEqual(retryTimes, maxRetries + 1, "Should retry exactly maxRetries times");
});

it("Should succeed before maxRetries is exceeded", async () => {
const maxRetries = 5;
let retryTimes = 0;
const api = async (): Promise<boolean> => {
retryTimes += 1;
// Succeed on the 3rd attempt (after 2 failures)
if (retryTimes < 3) {
const error = new Error("Throw error");
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access -- TODO: use a real type
(error as any).canRetry = true;
throw error;
}
return true;
};

const success = await runWithFastSetTimeout(async () =>
runWithRetry(api, "test", logger, {
maxRetries,
}),
);
assert.strictEqual(success, true, "Should succeed");
assert.strictEqual(retryTimes, 3, "Should take 3 attempts to succeed");
});

it("Should retry infinitely when maxRetries is undefined", async () => {
const totalRetries = 10;
let retryTimes = 0;
const api = async (): Promise<boolean> => {
retryTimes += 1;
if (retryTimes <= totalRetries) {
const error = new Error("Throw error");
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access -- TODO: use a real type
(error as any).canRetry = true;
throw error;
}
return true;
};

const success = await runWithFastSetTimeout(async () =>
runWithRetry(api, "test", logger, {}),
);
assert.strictEqual(success, true, "Should succeed");
assert.strictEqual(retryTimes, totalRetries + 1, "Should retry until success");
});

it("Should fail immediately with maxRetries set to 0", async () => {
let retryTimes = 0;
const api = async (): Promise<boolean> => {
retryTimes += 1;
const error = new Error("Throw error");
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access -- TODO: use a real type
(error as any).canRetry = true;
throw error;
};

try {
await runWithFastSetTimeout(async () =>
runWithRetry(api, "test", logger, {
maxRetries: 0,
}),
);
assert.fail("Should not succeed");
} catch (error) {
// Verify the wrapped error includes the original error message
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-assignment -- TODO: use a real type
const errorMessage = (error as any).message;
assert.strictEqual(errorMessage, "runWithRetry failed after max retries: Throw error");
// Verify the original error is preserved in the cause property
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-assignment -- TODO: use a real type
const causeMessage = (error as any).cause?.message;
assert.strictEqual(causeMessage, "Throw error");
}
// Only the initial call, no retries
assert.strictEqual(retryTimes, 1, "Should not retry at all");
});
});
Loading