1- import { error } from "@opennextjs/aws/adapters/logger.js" ;
1+ import { debug , error } from "@opennextjs/aws/adapters/logger.js" ;
22import type { QueueMessage } from "@opennextjs/aws/types/overrides" ;
33import {
44 FatalError ,
@@ -8,11 +8,15 @@ import {
88} from "@opennextjs/aws/utils/error.js" ;
99import { DurableObject } from "cloudflare:workers" ;
1010
11- const MAX_REVALIDATION_BY_DURABLE_OBJECT = 5 ;
11+ const DEFAULT_MAX_REVALIDATION_BY_DURABLE_OBJECT = 5 ;
1212const DEFAULT_REVALIDATION_TIMEOUT_MS = 10_000 ;
13+ const DEFAULT_REVALIDATION_RETRY_INTERVAL_MS = 2_000 ;
14+ const DEFAULT_MAX_REVALIDATION_ATTEMPTS = 6 ;
1315
14- interface ExtendedQueueMessage extends QueueMessage {
15- previewModeId : string ;
16+ interface FailedState {
17+ msg : QueueMessage ;
18+ retryCount : number ;
19+ nextAlarmMs : number ;
1620}
1721
1822export class DurableObjectQueueHandler extends DurableObject < CloudflareEnv > {
@@ -21,37 +25,73 @@ export class DurableObjectQueueHandler extends DurableObject<CloudflareEnv> {
2125 // TODO: handle incremental cache with only eventual consistency (i.e. KV or R2/D1 with the optional cache layer on top)
2226 ongoingRevalidations = new Map < string , Promise < void > > ( ) ;
2327
24- // TODO: restore the state of the failed revalidations - Probably in the next PR where i'll add the storage
25- routeInFailedState = new Map <
26- string ,
27- { msg : ExtendedQueueMessage ; retryCount : number ; nextAlarmMs : number }
28- > ( ) ;
28+ sql : SqlStorage ;
29+
30+ routeInFailedState = new Map < string , FailedState > ( ) ;
2931
3032 service : NonNullable < CloudflareEnv [ "NEXT_CACHE_REVALIDATION_WORKER" ] > ;
3133
32- // TODO: allow this to be configurable - How do we want todo that? env variable? passed down from the queue override ?
33- maxRevalidations = MAX_REVALIDATION_BY_DURABLE_OBJECT ;
34+ // Configurable params
35+ readonly maxRevalidations : number ;
36+ readonly revalidationTimeout : number ;
37+ readonly revalidationRetryInterval : number ;
38+ readonly maxRevalidationAttempts : number ;
3439
3540 constructor ( ctx : DurableObjectState , env : CloudflareEnv ) {
3641 super ( ctx , env ) ;
3742 this . service = env . NEXT_CACHE_REVALIDATION_WORKER ! ;
3843 // If there is no service binding, we throw an error because we can't revalidate without it
3944 if ( ! this . service ) throw new IgnorableError ( "No service binding for cache revalidation worker" ) ;
45+ this . sql = ctx . storage . sql ;
46+
47+ // We restore the state
48+ ctx . blockConcurrencyWhile ( async ( ) => {
49+ debug ( `Restoring the state of the durable object` ) ;
50+ await this . initState ( ) ;
51+ } ) ;
52+
53+ this . maxRevalidations = env . MAX_REVALIDATION_BY_DURABLE_OBJECT
54+ ? parseInt ( env . MAX_REVALIDATION_BY_DURABLE_OBJECT )
55+ : DEFAULT_MAX_REVALIDATION_BY_DURABLE_OBJECT ;
56+
57+ this . revalidationTimeout = env . REVALIDATION_TIMEOUT_MS
58+ ? parseInt ( env . REVALIDATION_TIMEOUT_MS )
59+ : DEFAULT_REVALIDATION_TIMEOUT_MS ;
60+
61+ this . revalidationRetryInterval = env . REVALIDATION_RETRY_INTERVAL_MS
62+ ? parseInt ( env . REVALIDATION_RETRY_INTERVAL_MS )
63+ : DEFAULT_REVALIDATION_RETRY_INTERVAL_MS ;
64+
65+ this . maxRevalidationAttempts = env . MAX_REVALIDATION_ATTEMPTS
66+ ? parseInt ( env . MAX_REVALIDATION_ATTEMPTS )
67+ : DEFAULT_MAX_REVALIDATION_ATTEMPTS ;
68+
69+ debug ( `Durable object initialized` ) ;
4070 }
4171
42- async revalidate ( msg : ExtendedQueueMessage ) {
72+ async revalidate ( msg : QueueMessage ) {
4373 // If there is already an ongoing revalidation, we don't need to revalidate again
4474 if ( this . ongoingRevalidations . has ( msg . MessageDeduplicationId ) ) return ;
4575
4676 // The route is already in a failed state, it will be retried later
4777 if ( this . routeInFailedState . has ( msg . MessageDeduplicationId ) ) return ;
4878
49- if ( this . ongoingRevalidations . size >= MAX_REVALIDATION_BY_DURABLE_OBJECT ) {
79+ // If the last success is newer than the last modified, it's likely that the regional cache is out of date
80+ // We don't need to revalidate in this case
81+ if ( this . checkSyncTable ( msg ) ) return ;
82+
83+ if ( this . ongoingRevalidations . size >= this . maxRevalidations ) {
84+ debug (
85+ `The maximum number of revalidations (${ this . maxRevalidations } ) is reached. Blocking until one of the revalidations finishes.`
86+ ) ;
5087 const ongoingRevalidations = this . ongoingRevalidations . values ( ) ;
5188 // When there is more than the max revalidations, we block concurrency until one of the revalidations finishes
5289 // We still await the promise to ensure the revalidation is completed
5390 // This is fine because the queue itself run inside a waitUntil
54- await this . ctx . blockConcurrencyWhile ( ( ) => Promise . race ( ongoingRevalidations ) ) ;
91+ await this . ctx . blockConcurrencyWhile ( async ( ) => {
92+ debug ( `Waiting for one of the revalidations to finish` ) ;
93+ await Promise . race ( ongoingRevalidations ) ;
94+ } ) ;
5595 }
5696
5797 const revalidationPromise = this . executeRevalidation ( msg ) ;
@@ -63,31 +103,33 @@ export class DurableObjectQueueHandler extends DurableObject<CloudflareEnv> {
63103 this . ctx . waitUntil ( revalidationPromise ) ;
64104 }
65105
66- private async executeRevalidation ( msg : ExtendedQueueMessage ) {
106+ private async executeRevalidation ( msg : QueueMessage ) {
67107 try {
108+ debug ( `Revalidating ${ msg . MessageBody . host } ${ msg . MessageBody . url } ` ) ;
68109 const {
69110 MessageBody : { host, url } ,
70- previewModeId,
71111 } = msg ;
72112 const protocol = host . includes ( "localhost" ) ? "http" : "https" ;
73113
74- //TODO: handle the different types of errors that can occur during the fetch (i.e. timeout, network error, etc)
75114 const response = await this . service . fetch ( `${ protocol } ://${ host } ${ url } ` , {
76115 method : "HEAD" ,
77116 headers : {
78- "x-prerender-revalidate" : previewModeId ,
117+ // This is defined during build
118+ "x-prerender-revalidate" : process . env . __NEXT_PREVIEW_MODE_ID ! ,
79119 "x-isr" : "1" ,
80120 } ,
81- signal : AbortSignal . timeout ( DEFAULT_REVALIDATION_TIMEOUT_MS ) ,
121+ signal : AbortSignal . timeout ( this . revalidationTimeout ) ,
82122 } ) ;
83123 // Now we need to handle errors from the fetch
84124 if ( response . status === 200 && response . headers . get ( "x-nextjs-cache" ) !== "REVALIDATED" ) {
85- // Something is very wrong here, it means that either the page is not ISR/SSG (and we shouldn't be here) or the `x-prerender-revalidate` header is not correct (and it should not happen either)
125+ this . routeInFailedState . delete ( msg . MessageDeduplicationId ) ;
86126 throw new FatalError (
87127 `The revalidation for ${ host } ${ url } cannot be done. This error should never happen.`
88128 ) ;
89129 } else if ( response . status === 404 ) {
90130 // The page is not found, we should not revalidate it
131+ // We remove the route from the failed state because it might be expected (i.e. a route that was deleted)
132+ this . routeInFailedState . delete ( msg . MessageDeduplicationId ) ;
91133 throw new IgnorableError (
92134 `The revalidation for ${ host } ${ url } cannot be done because the page is not found. It's either expected or an error in user code itself`
93135 ) ;
@@ -100,8 +142,23 @@ export class DurableObjectQueueHandler extends DurableObject<CloudflareEnv> {
100142 } else if ( response . status !== 200 ) {
101143 // TODO: check if we need to handle cloudflare specific status codes/errors
102144 // An unknown error occurred, most likely from something in user code like missing auth in the middleware
145+
146+ // We probably want to retry in this case as well
147+ await this . addToFailedState ( msg ) ;
148+
103149 throw new RecoverableError ( `An unknown error occurred while revalidating ${ host } ${ url } ` ) ;
104150 }
151+ // Everything went well, we can update the sync table
152+ // We use unixepoch here,it also works with Date.now()/1000, but not with Date.now() alone.
153+ // TODO: This needs to be investigated
154+ this . sql . exec (
155+ "INSERT OR REPLACE INTO sync (id, lastSuccess, buildId) VALUES (?, unixepoch(), ?)" ,
156+ // We cannot use the deduplication id because it's not unique per route - every time a route is revalidated, the deduplication id is different.
157+ `${ host } ${ url } ` ,
158+ process . env . __NEXT_BUILD_ID
159+ ) ;
160+ // If everything went well, we can remove the route from the failed state
161+ this . routeInFailedState . delete ( msg . MessageDeduplicationId ) ;
105162 } catch ( e ) {
106163 // Do we want to propagate the error to the calling worker?
107164 if ( ! isOpenNextError ( e ) ) {
@@ -125,36 +182,47 @@ export class DurableObjectQueueHandler extends DurableObject<CloudflareEnv> {
125182 ) ;
126183 const allEventsToRetry = nextEventToRetry ? [ nextEventToRetry , ...expiredEvents ] : expiredEvents ;
127184 for ( const event of allEventsToRetry ) {
185+ debug ( `Retrying revalidation for ${ event . msg . MessageBody . host } ${ event . msg . MessageBody . url } ` ) ;
128186 await this . executeRevalidation ( event . msg ) ;
129- this . routeInFailedState . delete ( event . msg . MessageDeduplicationId ) ;
130187 }
131188 }
132189
133- async addToFailedState ( msg : ExtendedQueueMessage ) {
190+ async addToFailedState ( msg : QueueMessage ) {
191+ debug ( `Adding ${ msg . MessageBody . host } ${ msg . MessageBody . url } to the failed state` ) ;
134192 const existingFailedState = this . routeInFailedState . get ( msg . MessageDeduplicationId ) ;
135193
194+ let updatedFailedState : FailedState ;
195+
136196 if ( existingFailedState ) {
137- if ( existingFailedState . retryCount >= 6 ) {
197+ if ( existingFailedState . retryCount >= this . maxRevalidationAttempts ) {
138198 // We give up after 6 retries and log the error
139199 error (
140200 `The revalidation for ${ msg . MessageBody . host } ${ msg . MessageBody . url } has failed after 6 retries. It will not be tried again, but subsequent ISR requests will retry.`
141201 ) ;
142202 this . routeInFailedState . delete ( msg . MessageDeduplicationId ) ;
143203 return ;
144204 }
145- const nextAlarmMs = Date . now ( ) + Math . pow ( 2 , existingFailedState . retryCount + 1 ) * 2_000 ;
146- this . routeInFailedState . set ( msg . MessageDeduplicationId , {
205+ const nextAlarmMs =
206+ Date . now ( ) + Math . pow ( 2 , existingFailedState . retryCount + 1 ) * this . revalidationRetryInterval ;
207+ updatedFailedState = {
147208 ...existingFailedState ,
148209 retryCount : existingFailedState . retryCount + 1 ,
149210 nextAlarmMs,
150- } ) ;
211+ } ;
151212 } else {
152- this . routeInFailedState . set ( msg . MessageDeduplicationId , {
213+ updatedFailedState = {
153214 msg,
154215 retryCount : 1 ,
155216 nextAlarmMs : Date . now ( ) + 2_000 ,
156- } ) ;
217+ } ;
157218 }
219+ this . routeInFailedState . set ( msg . MessageDeduplicationId , updatedFailedState ) ;
220+ this . sql . exec (
221+ "INSERT OR REPLACE INTO failed_state (id, data, buildId) VALUES (?, ?, ?)" ,
222+ msg . MessageDeduplicationId ,
223+ JSON . stringify ( updatedFailedState ) ,
224+ process . env . __NEXT_BUILD_ID
225+ ) ;
158226 // We probably want to do something if routeInFailedState is becoming too big, at least log it
159227 await this . addAlarm ( ) ;
160228 }
@@ -164,9 +232,60 @@ export class DurableObjectQueueHandler extends DurableObject<CloudflareEnv> {
164232 if ( existingAlarm ) return ;
165233 if ( this . routeInFailedState . size === 0 ) return ;
166234
167- const nextAlarmToSetup = Math . min (
235+ let nextAlarmToSetup = Math . min (
168236 ...Array . from ( this . routeInFailedState . values ( ) ) . map ( ( { nextAlarmMs } ) => nextAlarmMs )
169237 ) ;
238+ if ( nextAlarmToSetup < Date . now ( ) ) {
239+ // We don't want to set an alarm in the past
240+ nextAlarmToSetup = Date . now ( ) + this . revalidationRetryInterval ;
241+ }
170242 await this . ctx . storage . setAlarm ( nextAlarmToSetup ) ;
171243 }
244+
245+ // This function is used to restore the state of the durable object
246+ // We don't restore the ongoing revalidations because we cannot know in which state they are
247+ // We only restore the failed state and the alarm
248+ async initState ( ) {
249+ // We store the failed state as a blob, we don't want to do anything with it anyway besides restoring
250+ this . sql . exec ( "CREATE TABLE IF NOT EXISTS failed_state (id TEXT PRIMARY KEY, data TEXT, buildId TEXT)" ) ;
251+
252+ // We create the sync table to handle eventually consistent incremental cache
253+ this . sql . exec ( "CREATE TABLE IF NOT EXISTS sync (id TEXT PRIMARY KEY, lastSuccess INTEGER, buildId TEXT)" ) ;
254+
255+ // Before doing anything else, we clear the DB for any potential old data
256+ this . sql . exec ( "DELETE FROM failed_state WHERE buildId != ?" , process . env . __NEXT_BUILD_ID ) ;
257+ this . sql . exec ( "DELETE FROM sync WHERE buildId != ?" , process . env . __NEXT_BUILD_ID ) ;
258+
259+ const failedStateCursor = this . sql . exec < { id : string ; data : string } > ( "SELECT * FROM failed_state" ) ;
260+ for ( const row of failedStateCursor ) {
261+ this . routeInFailedState . set ( row . id , JSON . parse ( row . data ) ) ;
262+ }
263+
264+ // Now that we have restored the failed state, we can restore the alarm as well
265+ await this . addAlarm ( ) ;
266+ }
267+
268+ /**
269+ *
270+ * @param msg
271+ * @returns `true` if the route has been revalidated since the lastModified from the message, `false` otherwise
272+ */
273+ checkSyncTable ( msg : QueueMessage ) {
274+ try {
275+ const numNewer = this . sql
276+ . exec < {
277+ numNewer : number ;
278+ } > (
279+ "SELECT COUNT(*) as numNewer FROM sync WHERE id = ? AND lastSuccess > ? LIMIT 1" ,
280+ `${ msg . MessageBody . host } ${ msg . MessageBody . url } ` ,
281+ Math . round ( msg . MessageBody . lastModified / 1000 )
282+ )
283+ . one ( ) . numNewer ;
284+
285+ return numNewer > 0 ;
286+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
287+ } catch ( e : unknown ) {
288+ return false ;
289+ }
290+ }
172291}
0 commit comments