Skip to content

Commit 9348599

Browse files
committed
fix: prevent stale-relay concurrent refresh from orphaning relay promises
1 parent 4fa0ac3 commit 9348599

File tree

2 files changed

+185
-140
lines changed

2 files changed

+185
-140
lines changed

packages/octez.connect-transport-matrix/__tests__/communication-client/P2PCommunicationClient.test.ts

Lines changed: 154 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,8 @@ jest.mock('axios')
55
jest.mock('@tezos-x/octez.connect-utils', () => {
66
const actual = jest.requireActual('@tezos-x/octez.connect-utils')
77

8-
class ExposedPromise<T> {
9-
public promise: Promise<T>
10-
private _resolve!: (value: T) => void
11-
private _reject!: (reason?: any) => void
12-
13-
constructor() {
14-
this.promise = new Promise<T>((res, rej) => {
15-
this._resolve = res
16-
this._reject = rej
17-
})
18-
// Prevent Node.js from throwing unhandled promise rejection errors during tests
19-
// when the ExposedPromise is rejected before any caller awaits it,
20-
// which can occur in error recovery paths
21-
this.promise.catch(() => {})
22-
}
23-
resolve(value: T) {
24-
this._resolve(value)
25-
}
26-
reject(reason?: any) {
27-
this._reject(reason)
28-
}
29-
isResolved(): boolean {
30-
return true
31-
}
32-
}
33-
348
return {
359
...actual,
36-
ExposedPromise,
3710
generateGUID: jest.fn(),
3811
getHexHash: jest.fn(),
3912
recipientString: jest.fn(),
@@ -54,6 +27,7 @@ jest.mock('../../src/matrix-client/MatrixClient', () => ({
5427
// Imports
5528
import axios from 'axios'
5629
import {
30+
ExposedPromise,
5731
generateGUID,
5832
getHexHash,
5933
recipientString,
@@ -168,161 +142,211 @@ describe('P2PCommunicationClient', () => {
168142

169143
describe('getRelayServer (dead node recovery)', () => {
170144
let freshClient: P2PCommunicationClient
145+
type RelayServerRecord = { server: string; timestamp: number; localTimestamp: number }
146+
type Deferred<T> = {
147+
promise: Promise<T>
148+
resolve: (value: T) => void
149+
reject: (reason?: unknown) => void
150+
}
171151

172152
beforeEach(() => {
173153
freshClient = new P2PCommunicationClient('MyApp', fakeKeyPair as any, 2, mockStorage as any)
174154
})
175155

176-
it('falls through to server discovery when stored node is unreachable', async () => {
177-
// Stored node exists but is dead
178-
mockStorage.get.mockResolvedValue('dead-node.papers.tech')
156+
const createDeferred = <T>(): Deferred<T> => {
157+
let resolve!: (value: T) => void
158+
let reject!: (reason?: unknown) => void
159+
const promise = new Promise<T>((res, rej) => {
160+
resolve = res
161+
reject = rej
162+
})
163+
164+
return { promise, resolve, reject }
165+
}
166+
167+
const setCachedRelay = (relay: RelayServerRecord): void => {
168+
;(freshClient as any).relayServer = ExposedPromise.resolve(relay)
169+
}
170+
171+
it('returns in-memory relay server when cache is fresh', async () => {
172+
const now = Date.now()
173+
setCachedRelay({ server: 'cached-node.papers.tech', timestamp: 1234, localTimestamp: now })
174+
175+
const beaconInfoSpy = jest.spyOn(freshClient, 'getBeaconInfo')
176+
const discoverySpy = jest.spyOn(freshClient as any, 'findBestRegionAndGetServer')
177+
mockStorage.get.mockResolvedValue('should-not-be-read')
178+
179+
const result = await freshClient.getRelayServer()
180+
181+
expect(result).toEqual({ server: 'cached-node.papers.tech', timestamp: 1234 })
182+
expect(beaconInfoSpy).not.toHaveBeenCalled()
183+
expect(discoverySpy).not.toHaveBeenCalled()
184+
expect(mockStorage.get).not.toHaveBeenCalled()
185+
})
186+
187+
it('refreshes stale in-memory relay server when cached server is reachable', async () => {
188+
setCachedRelay({ server: 'cached-node.papers.tech', timestamp: 1000, localTimestamp: 0 })
189+
190+
const beaconInfoSpy = jest
191+
.spyOn(freshClient, 'getBeaconInfo')
192+
.mockResolvedValue({ region: 'eu', known_servers: [], timestamp: 2222 })
193+
const discoverySpy = jest.spyOn(freshClient as any, 'findBestRegionAndGetServer')
194+
195+
const result = await freshClient.getRelayServer()
196+
197+
expect(result).toEqual({ server: 'cached-node.papers.tech', timestamp: 2222 })
198+
expect(beaconInfoSpy).toHaveBeenCalledWith('cached-node.papers.tech')
199+
expect(discoverySpy).not.toHaveBeenCalled()
200+
expect(mockStorage.delete).not.toHaveBeenCalled()
201+
})
202+
203+
it('falls through to discovery when stale cached server is unreachable', async () => {
204+
setCachedRelay({ server: 'cached-node.papers.tech', timestamp: 1000, localTimestamp: 0 })
179205
mockStorage.delete.mockResolvedValue(undefined)
180206
mockStorage.set.mockResolvedValue(undefined)
207+
mockStorage.get.mockResolvedValue('')
181208

182-
// First call (stored node) rejects, second call (discovery probe) succeeds
183-
const axiosGetMock = axios.get as jest.Mock
184-
axiosGetMock.mockRejectedValueOnce(new Error('ECONNREFUSED')).mockResolvedValue({
185-
data: { region: 'eu', known_servers: ['a'], timestamp: 5000 }
209+
const beaconInfoSpy = jest
210+
.spyOn(freshClient, 'getBeaconInfo')
211+
.mockRejectedValueOnce(new Error('ETIMEDOUT'))
212+
const discoverySpy = jest.spyOn(freshClient as any, 'findBestRegionAndGetServer').mockResolvedValue({
213+
server: 'discovered-node.papers.tech',
214+
timestamp: 5000
186215
})
187216

188217
const result = await freshClient.getRelayServer()
189218

190-
// Should have deleted the dead node from storage
219+
expect(result).toEqual({ server: 'discovered-node.papers.tech', timestamp: 5000 })
220+
expect(beaconInfoSpy).toHaveBeenCalledWith('cached-node.papers.tech')
221+
expect(discoverySpy).toHaveBeenCalledTimes(1)
191222
expect(mockStorage.delete).toHaveBeenCalledWith(StorageKey.MATRIX_SELECTED_NODE)
192-
193-
// Should have resolved via discovery
194-
expect(result.server).toBeDefined()
195-
expect(result.timestamp).toBe(5000)
196223
})
197224

198225
it('uses stored node when it is reachable', async () => {
199226
mockStorage.get.mockResolvedValue('healthy-node.papers.tech')
227+
mockStorage.set.mockResolvedValue(undefined)
200228

201-
const axiosGetMock = axios.get as jest.Mock
202-
axiosGetMock.mockResolvedValue({
203-
data: { region: 'eu', known_servers: ['a'], timestamp: 7777 }
204-
})
229+
const beaconInfoSpy = jest
230+
.spyOn(freshClient, 'getBeaconInfo')
231+
.mockResolvedValue({ region: 'eu', known_servers: [], timestamp: 7777 })
232+
const discoverySpy = jest.spyOn(freshClient as any, 'findBestRegionAndGetServer')
205233

206234
const result = await freshClient.getRelayServer()
207235

208-
expect(result.server).toBe('healthy-node.papers.tech')
209-
expect(result.timestamp).toBe(7777)
210-
// Should NOT have deleted the node
236+
expect(result).toEqual({ server: 'healthy-node.papers.tech', timestamp: 7777 })
237+
expect(beaconInfoSpy).toHaveBeenCalledWith('healthy-node.papers.tech')
238+
expect(discoverySpy).not.toHaveBeenCalled()
211239
expect(mockStorage.delete).not.toHaveBeenCalledWith(StorageKey.MATRIX_SELECTED_NODE)
212240
})
213241

214-
it('rejects ExposedPromise when all servers fail, preventing concurrent caller deadlock', async () => {
215-
mockStorage.get.mockResolvedValue('')
242+
it('falls through to discovery when stored node is unreachable', async () => {
243+
mockStorage.get.mockResolvedValue('dead-node.papers.tech')
216244
mockStorage.delete.mockResolvedValue(undefined)
245+
mockStorage.set.mockResolvedValue(undefined)
217246

218-
// All discovery probes fail
219-
const axiosGetMock = axios.get as jest.Mock
220-
axiosGetMock.mockRejectedValue(new Error('ECONNREFUSED'))
247+
const beaconInfoSpy = jest
248+
.spyOn(freshClient, 'getBeaconInfo')
249+
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
250+
const discoverySpy = jest.spyOn(freshClient as any, 'findBestRegionAndGetServer').mockResolvedValue({
251+
server: 'discovered-node.papers.tech',
252+
timestamp: 5000
253+
})
221254

222-
await expect(freshClient.getRelayServer()).rejects.toThrow()
255+
const result = await freshClient.getRelayServer()
223256

224-
// The ExposedPromise should be cleared so subsequent callers get a fresh attempt
225-
expect((freshClient as any).relayServer).toBeUndefined()
257+
expect(result).toEqual({ server: 'discovered-node.papers.tech', timestamp: 5000 })
258+
expect(beaconInfoSpy).toHaveBeenCalledWith('dead-node.papers.tech')
259+
expect(discoverySpy).toHaveBeenCalledTimes(1)
260+
expect(mockStorage.delete).toHaveBeenCalledWith(StorageKey.MATRIX_SELECTED_NODE)
226261
})
227262

228-
it('resets and retries when cached relay server becomes unreachable on timestamp refresh', async () => {
229-
mockStorage.get.mockResolvedValue('')
230-
mockStorage.set.mockResolvedValue(undefined)
263+
it('clears in-flight relay promise when discovery fails', async () => {
264+
const storageGet = createDeferred<string>()
265+
mockStorage.get.mockReturnValue(storageGet.promise)
231266
mockStorage.delete.mockResolvedValue(undefined)
232267

233-
// First getRelayServer call: no stored node, discovery finds a server
234-
const axiosGetMock = axios.get as jest.Mock
235-
axiosGetMock.mockResolvedValue({
236-
data: { region: 'eu', known_servers: ['a'], timestamp: 1000 }
268+
jest.spyOn(freshClient as any, 'findBestRegionAndGetServer').mockImplementation(() => {
269+
throw new Error('offline')
237270
})
238271

239-
const firstResult = await freshClient.getRelayServer()
240-
expect(firstResult.timestamp).toBe(1000)
272+
const relayCall = freshClient.getRelayServer()
273+
await Promise.resolve()
274+
const internalRelayPromise = (freshClient as any).relayServer?.promise
275+
internalRelayPromise?.catch(() => undefined)
276+
storageGet.resolve('')
241277

242-
// Force the localTimestamp to be old so the stale-timestamp refresh path triggers
243-
const relayServerPromise = (freshClient as any).relayServer
244-
if (relayServerPromise) {
245-
const resolved = await relayServerPromise.promise
246-
resolved.localTimestamp = 0
247-
}
278+
await expect(relayCall).rejects.toThrow('offline')
248279

249-
// The refresh call to the cached server fails (ETIMEDOUT).
250-
// The recovery path resets state and retries via findBestRegionAndGetServer(),
251-
// which probes all servers. Set up the mock so the first call rejects,
252-
// then all subsequent calls (discovery probes) succeed with a new timestamp.
253-
axiosGetMock
254-
.mockReset()
255-
.mockRejectedValueOnce(new Error('ETIMEDOUT'))
256-
.mockResolvedValue({
257-
data: { region: 'us', known_servers: ['b'], timestamp: 2000 }
258-
})
280+
expect((freshClient as any).relayServer).toBeUndefined()
281+
})
259282

260-
const secondResult = await freshClient.getRelayServer()
283+
it('reuses a single in-flight discovery for concurrent callers', async () => {
284+
mockStorage.get.mockResolvedValue('')
285+
mockStorage.set.mockResolvedValue(undefined)
286+
const discoverySpy = jest.spyOn(freshClient as any, 'findBestRegionAndGetServer').mockResolvedValue({
287+
server: 'discovered-node.papers.tech',
288+
timestamp: 3000
289+
})
261290

262-
expect(secondResult.timestamp).toBe(2000)
263-
expect(mockStorage.delete).toHaveBeenCalledWith(StorageKey.MATRIX_SELECTED_NODE)
291+
const [resultA, resultB] = await Promise.all([freshClient.getRelayServer(), freshClient.getRelayServer()])
292+
293+
expect(resultA).toEqual({ server: 'discovered-node.papers.tech', timestamp: 3000 })
294+
expect(resultB).toEqual({ server: 'discovered-node.papers.tech', timestamp: 3000 })
295+
expect(discoverySpy).toHaveBeenCalledTimes(1)
264296
})
265297

266-
it('handles race condition when multiple callers refresh stale timestamp concurrently', async () => {
298+
it('preserves successful cache when concurrent stale refresh has mixed success/failure', async () => {
299+
setCachedRelay({ server: 'cached-node.papers.tech', timestamp: 1000, localTimestamp: 0 })
267300
mockStorage.get.mockResolvedValue('')
268301
mockStorage.set.mockResolvedValue(undefined)
269302
mockStorage.delete.mockResolvedValue(undefined)
270303

271-
// First getRelayServer call: establish a cached server
272-
const axiosGetMock = axios.get as jest.Mock
273-
axiosGetMock.mockResolvedValue({
274-
data: { region: 'eu', known_servers: ['a'], timestamp: 1000 }
275-
})
304+
let callCount = 0
305+
let resolveSecond!: (value: { region: string; known_servers: string[]; timestamp: number }) => void
306+
let rejectFirst!: (reason?: unknown) => void
276307

277-
await freshClient.getRelayServer()
278-
279-
// Force the localTimestamp to be old so the stale-timestamp refresh path triggers
280-
const relayServerPromise = (freshClient as any).relayServer
281-
if (relayServerPromise) {
282-
const resolved = await relayServerPromise.promise
283-
resolved.localTimestamp = 0
284-
}
308+
const firstRefresh = new Promise<{ region: string; known_servers: string[]; timestamp: number }>(
309+
(_, reject) => {
310+
rejectFirst = reject
311+
}
312+
)
313+
const secondRefresh = new Promise<{ region: string; known_servers: string[]; timestamp: number }>(
314+
(resolve) => {
315+
resolveSecond = resolve
316+
}
317+
)
285318

286-
// Both concurrent callers will fail their getBeaconInfo calls
287-
// The fix ensures callers don't reset this.relayServer if another caller
288-
// has already created a new promise instance for retry, preventing orphaned promises
289-
let firstCallReachedCatch = false
290-
let secondCallReachedCatch = false
291-
292-
axiosGetMock.mockReset()
293-
294-
// Create a controlled delay to ensure interleaving
295-
axiosGetMock.mockImplementation(() => {
296-
return new Promise((resolve, reject) => {
297-
setTimeout(() => {
298-
reject(new Error('ETIMEDOUT'))
299-
}, 10)
300-
})
319+
jest.spyOn(freshClient, 'getBeaconInfo').mockImplementation(async () => {
320+
callCount += 1
321+
if (callCount === 1) {
322+
return firstRefresh
323+
}
324+
if (callCount === 2) {
325+
return secondRefresh
326+
}
327+
throw new Error(`Unexpected getBeaconInfo call #${callCount}`)
301328
})
302329

303-
// Start two concurrent calls
304-
const call1 = freshClient.getRelayServer().catch(() => {
305-
firstCallReachedCatch = true
306-
})
307-
const call2 = freshClient.getRelayServer().catch(() => {
308-
secondCallReachedCatch = true
309-
})
330+
const discoverySpy = jest
331+
.spyOn(freshClient as any, 'findBestRegionAndGetServer')
332+
.mockResolvedValue({ server: 'unexpected-discovery.papers.tech', timestamp: 9999 })
310333

311-
// After both fail, set up successful discovery
312-
await Promise.allSettled([call1, call2])
334+
const call1 = freshClient.getRelayServer().catch((error) => error)
335+
const call2 = freshClient.getRelayServer()
336+
await Promise.resolve()
337+
const internalRelayPromise = (freshClient as any).relayServer?.promise
338+
internalRelayPromise?.catch(() => undefined)
313339

314-
axiosGetMock.mockResolvedValue({
315-
data: { region: 'us', known_servers: ['b'], timestamp: 3000 }
316-
})
340+
resolveSecond({ region: 'us', known_servers: [], timestamp: 2000 })
341+
const secondResult = await call2
342+
expect(secondResult).toEqual({ server: 'cached-node.papers.tech', timestamp: 2000 })
317343

318-
// The third call should succeed without hanging (proving no orphaned promises)
319-
const thirdResult = await freshClient.getRelayServer()
344+
rejectFirst(new Error('stale refresh failed in one caller'))
345+
await call1
320346

321-
expect(thirdResult.server).toBeDefined()
322-
expect(thirdResult.timestamp).toBe(3000)
323-
expect(firstCallReachedCatch || secondCallReachedCatch).toBe(true)
324-
// Both failed calls should have attempted cleanup
325-
expect(mockStorage.delete).toHaveBeenCalledWith(StorageKey.MATRIX_SELECTED_NODE)
347+
const thirdResult = await freshClient.getRelayServer()
348+
expect(thirdResult).toEqual({ server: 'cached-node.papers.tech', timestamp: 2000 })
349+
expect(discoverySpy).not.toHaveBeenCalled()
326350
})
327351
})
328352

0 commit comments

Comments
 (0)