diff --git a/server/lib/matrix-utils/create-retry-fn-if-not-joined.js b/server/lib/matrix-utils/create-retry-fn-if-not-joined.js new file mode 100644 index 0000000..a96fa4e --- /dev/null +++ b/server/lib/matrix-utils/create-retry-fn-if-not-joined.js @@ -0,0 +1,92 @@ +'use strict'; + +const assert = require('assert'); + +const { HTTPResponseError } = require('../fetch-endpoint'); +const ensureRoomJoined = require('./ensure-room-joined'); + +const JOIN_STATES = { + unknown: 'unknown', + joining: 'joining', + joined: 'joined', + failed: 'failed', +}; +const joinStateValues = Object.values(JOIN_STATES); + +// Optimistically use the Matrix API assuming you're already joined to the room or +// accessing a `world_readable` room that doesn't require joining. If we see a 403 +// Forbidden, then try joining the room and retrying the API call. +// +// Usage: Call this once to first create a helper utility that will retry a given +// function appropriately. +function createRetryFnIfNotJoined( + accessToken, + roomIdOrAlias, + { viaServers = new Set(), abortSignal } = {} +) { + assert(accessToken); + assert(roomIdOrAlias); + // We use a `Set` to ensure that we don't have duplicate servers in the list + assert(viaServers instanceof Set); + + let joinState = JOIN_STATES.unknown; + let joinPromise = null; + + return async function retryFnIfNotJoined(fn) { + assert( + joinStateValues.includes(joinState), + `Unexpected internal join state when using createRetryFnIfNotJoined(...) (joinState=${joinState}). ` + + `This is a bug in the Matrix Public Archive. Please report` + ); + + if (joinState === JOIN_STATES.joining) { + // Wait for the join to finish before trying + await joinPromise; + } else if (joinState === JOIN_STATES.failed) { + // If we failed to join the room, then there is no way any other call is going + // to succeed so just immediately return an error. We return `joinPromise` + // which will resolve to the join error that occured + return joinPromise; + } + + try { + return await Promise.resolve(fn()); + } catch (errFromFn) { + const isForbiddenError = + errFromFn instanceof HTTPResponseError && errFromFn.response.status === 403; + + // If we're in the middle of joining, try again + if (joinState === JOIN_STATES.joining) { + return await retryFnIfNotJoined(fn); + } + // Try joining the room if we see a 403 Forbidden error as we may just not + // be part of the room yet. We can't distinguish between a room that has + // banned us vs a room we haven't joined yet so we just try joining the + // room in any case. + else if ( + isForbiddenError && + // Only try joining if we haven't tried joining yet + joinState === JOIN_STATES.unknown + ) { + joinState = JOIN_STATES.joining; + joinPromise = ensureRoomJoined(accessToken, roomIdOrAlias, { + viaServers, + abortSignal, + }); + + try { + await joinPromise; + joinState = JOIN_STATES.joined; + return await retryFnIfNotJoined(fn); + } catch (err) { + joinState = JOIN_STATES.failed; + throw err; + } + } + + throw errFromFn; + } + }; +} + +module.exports = createRetryFnIfNotJoined; diff --git a/server/lib/matrix-utils/ensure-room-joined.js b/server/lib/matrix-utils/ensure-room-joined.js index f192d76..02474c9 100644 --- a/server/lib/matrix-utils/ensure-room-joined.js +++ b/server/lib/matrix-utils/ensure-room-joined.js @@ -21,6 +21,8 @@ async function ensureRoomJoined( roomIdOrAlias, { viaServers = new Set(), abortSignal } = {} ) { + assert(accessToken); + assert(roomIdOrAlias); // We use a `Set` to ensure that we don't have duplicate servers in the list assert(viaServers instanceof Set); diff --git a/server/lib/matrix-utils/fetch-public-rooms.js b/server/lib/matrix-utils/fetch-public-rooms.js index bc9e7c5..165f9c5 100644 --- a/server/lib/matrix-utils/fetch-public-rooms.js +++ b/server/lib/matrix-utils/fetch-public-rooms.js @@ -40,11 +40,12 @@ async function fetchPublicRooms( abortSignal, }); - // We only want to see public rooms in the archive + // We only want to see public or world_readable rooms in the archive. A room can be + // world_readable without being public. For example someone might have an invite only + // room where only privileged users are allowed to join and talk but anyone can view + // the room. const accessibleRooms = publicRoomsRes.chunk.filter((room) => { - // `room.world_readable` is also accessible here but we only use history - // `world_readable` to determine search indexing. - return room.join_rule === 'public'; + return room.world_readable || room.join_rule === 'public'; }); return { diff --git a/server/lib/matrix-utils/fetch-room-data.js b/server/lib/matrix-utils/fetch-room-data.js index 91c702e..074fa39 100644 --- a/server/lib/matrix-utils/fetch-room-data.js +++ b/server/lib/matrix-utils/fetch-room-data.js @@ -3,7 +3,7 @@ const assert = require('assert'); const urlJoin = require('url-join'); -const { fetchEndpointAsJson } = require('../fetch-endpoint'); +const { HTTPResponseError, fetchEndpointAsJson } = require('../fetch-endpoint'); const parseViaServersFromUserInput = require('../parse-via-servers-from-user-input'); const { traceFunction } = require('../../tracing/trace-utilities'); @@ -20,13 +20,43 @@ function getStateEndpointForRoomIdAndEventType(roomId, eventType) { ); } +const fetchPotentiallyMissingStateEvent = traceFunction(async function ({ + accessToken, + roomId, + stateEventType, + abortSignal, +} = {}) { + assert(accessToken); + assert(roomId); + assert(stateEventType); + + try { + const { data } = await fetchEndpointAsJson( + getStateEndpointForRoomIdAndEventType(roomId, stateEventType), + { + accessToken, + abortSignal, + } + ); + return data; + } catch (err) { + const is404Error = err instanceof HTTPResponseError && err.response.status === 404; + + // Ignore 404 errors, because it just means that the room doesn't have that state + // event (which is normal). + if (!is404Error) { + throw err; + } + } +}); + // Unfortunately, we can't just get the event ID from the `/state?format=event` // endpoint, so we have to do this trick. Related to // https://github.com/matrix-org/synapse/issues/15454 // // TODO: Remove this when we have MSC3999 (because it's the only usage) const removeMe_fetchRoomCreateEventId = traceFunction(async function ( - matrixAccessToken, + accessToken, roomId, { abortSignal } = {} ) { @@ -36,7 +66,7 @@ const removeMe_fetchRoomCreateEventId = traceFunction(async function ( `_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/messages?dir=f&limit1` ), { - accessToken: matrixAccessToken, + accessToken, abortSignal, } ); @@ -51,22 +81,16 @@ const fetchRoomCreationInfo = traceFunction(async function ( roomId, { abortSignal } = {} ) { - const [stateCreateResDataOutcome] = await Promise.allSettled([ - fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.create'), { - accessToken: matrixAccessToken, - abortSignal, - }), - ]); + const stateCreateResData = await fetchPotentiallyMissingStateEvent({ + accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.create', + abortSignal, + }); - let roomCreationTs; - let predecessorRoomId; - let predecessorLastKnownEventId; - if (stateCreateResDataOutcome.reason === undefined) { - const { data } = stateCreateResDataOutcome.value; - roomCreationTs = data?.origin_server_ts; - predecessorLastKnownEventId = data?.content?.event_id; - predecessorRoomId = data?.content?.predecessor?.room_id; - } + const roomCreationTs = stateCreateResData?.origin_server_ts; + const predecessorRoomId = stateCreateResData?.content?.predecessor?.room_id; + const predecessorLastKnownEventId = stateCreateResData?.content?.event_id; return { roomCreationTs, predecessorRoomId, predecessorLastKnownEventId }; }); @@ -76,33 +100,33 @@ const fetchPredecessorInfo = traceFunction(async function ( roomId, { abortSignal } = {} ) { - const [roomCreationInfoOutcome, statePredecessorResDataOutcome] = await Promise.allSettled([ + const [roomCreationInfo, statePredecessorResData] = await Promise.all([ fetchRoomCreationInfo(matrixAccessToken, roomId, { abortSignal }), - fetchEndpointAsJson( - getStateEndpointForRoomIdAndEventType(roomId, 'org.matrix.msc3946.room_predecessor'), - { - accessToken: matrixAccessToken, - abortSignal, - } - ), + fetchPotentiallyMissingStateEvent({ + accessToken: matrixAccessToken, + roomId, + stateEventType: 'org.matrix.msc3946.room_predecessor', + abortSignal, + }), ]); let predecessorRoomId; let predecessorLastKnownEventId; let predecessorViaServers; // Prefer the dynamic predecessor from the dedicated state event - if (statePredecessorResDataOutcome.reason === undefined) { - const { data } = statePredecessorResDataOutcome.value; - predecessorRoomId = data?.content?.predecessor_room_id; - predecessorLastKnownEventId = data?.content?.last_known_event_id; - predecessorViaServers = parseViaServersFromUserInput(data?.content?.via_servers); + if (statePredecessorResData) { + predecessorRoomId = statePredecessorResData?.content?.predecessor_room_id; + predecessorLastKnownEventId = statePredecessorResData?.content?.last_known_event_id; + predecessorViaServers = parseViaServersFromUserInput( + statePredecessorResData?.content?.via_servers + ); } // Then fallback to the predecessor defined by the room creation event - else if (roomCreationInfoOutcome.reason === undefined) { - ({ predecessorRoomId, predecessorLastKnownEventId } = roomCreationInfoOutcome.value); + else if (roomCreationInfo) { + ({ predecessorRoomId, predecessorLastKnownEventId } = roomCreationInfo); } - const { roomCreationTs: currentRoomCreationTs } = roomCreationInfoOutcome; + const { roomCreationTs: currentRoomCreationTs } = roomCreationInfo; return { // This is prefixed with "current" so we don't get this confused with the @@ -119,20 +143,15 @@ const fetchSuccessorInfo = traceFunction(async function ( roomId, { abortSignal } = {} ) { - const [stateTombstoneResDataOutcome] = await Promise.allSettled([ - fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.tombstone'), { - accessToken: matrixAccessToken, - abortSignal, - }), - ]); + const stateTombstoneResData = await fetchPotentiallyMissingStateEvent({ + accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.tombstone', + abortSignal, + }); - let successorRoomId; - let successorSetTs; - if (stateTombstoneResDataOutcome.reason === undefined) { - const { data } = stateTombstoneResDataOutcome.value; - successorRoomId = data?.content?.replacement_room; - successorSetTs = data?.origin_server_ts; - } + const successorRoomId = stateTombstoneResData?.content?.replacement_room; + const successorSetTs = stateTombstoneResData?.origin_server_ts; return { successorRoomId, @@ -150,99 +169,70 @@ const fetchRoomData = traceFunction(async function ( assert(roomId); const [ - stateNameResDataOutcome, - stateTopicResDataOutcome, - stateCanonicalAliasResDataOutcome, - stateAvatarResDataOutcome, - stateHistoryVisibilityResDataOutcome, - stateJoinRulesResDataOutcome, - predecessorInfoOutcome, - successorInfoOutcome, - ] = await Promise.allSettled([ - fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.name'), { + stateNameResData, + stateTopicResData, + stateCanonicalAliasResData, + stateAvatarResData, + stateHistoryVisibilityResData, + stateJoinRulesResData, + predecessorInfo, + successorInfo, + ] = await Promise.all([ + fetchPotentiallyMissingStateEvent({ accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.name', abortSignal, }), - fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.topic'), { + fetchPotentiallyMissingStateEvent({ accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.topic', abortSignal, }), - fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.canonical_alias'), { + fetchPotentiallyMissingStateEvent({ accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.canonical_alias', abortSignal, }), - fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.avatar'), { + fetchPotentiallyMissingStateEvent({ accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.avatar', abortSignal, }), - fetchEndpointAsJson( - getStateEndpointForRoomIdAndEventType(roomId, 'm.room.history_visibility'), - { - accessToken: matrixAccessToken, - abortSignal, - } - ), - fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.join_rules'), { + fetchPotentiallyMissingStateEvent({ accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.history_visibility', + abortSignal, + }), + fetchPotentiallyMissingStateEvent({ + accessToken: matrixAccessToken, + roomId, + stateEventType: 'm.room.join_rules', abortSignal, }), fetchPredecessorInfo(matrixAccessToken, roomId, { abortSignal }), fetchSuccessorInfo(matrixAccessToken, roomId, { abortSignal }), ]); - let name; - if (stateNameResDataOutcome.reason === undefined) { - const { data } = stateNameResDataOutcome.value; - name = data?.content?.name; - } - - let canonicalAlias; - if (stateCanonicalAliasResDataOutcome.reason === undefined) { - const { data } = stateCanonicalAliasResDataOutcome.value; - canonicalAlias = data?.content?.alias; - } - - let topic; - if (stateTopicResDataOutcome.reason === undefined) { - const { data } = stateTopicResDataOutcome.value; - topic = data?.content?.topic; - } - - let avatarUrl; - if (stateAvatarResDataOutcome.reason === undefined) { - const { data } = stateAvatarResDataOutcome.value; - avatarUrl = data?.content?.url; - } + let name = stateNameResData?.content?.name; + let canonicalAlias = stateCanonicalAliasResData?.content?.alias; + let topic = stateTopicResData?.content?.topic; + let avatarUrl = stateAvatarResData?.content?.url; + let historyVisibility = stateHistoryVisibilityResData?.content?.history_visibility; + let joinRule = stateJoinRulesResData?.content?.join_rule; - let historyVisibility; - if (stateHistoryVisibilityResDataOutcome.reason === undefined) { - const { data } = stateHistoryVisibilityResDataOutcome.value; - historyVisibility = data?.content?.history_visibility; - } - - let joinRule; - if (stateJoinRulesResDataOutcome.reason === undefined) { - const { data } = stateJoinRulesResDataOutcome.value; - joinRule = data?.content?.join_rule; - } + const { + currentRoomCreationTs: roomCreationTs, + predecessorRoomId, + predecessorLastKnownEventId, + predecessorViaServers, + } = predecessorInfo; - let roomCreationTs; - let predecessorRoomId; - let predecessorLastKnownEventId; - let predecessorViaServers; - if (predecessorInfoOutcome.reason === undefined) { - ({ - currentRoomCreationTs: roomCreationTs, - predecessorRoomId, - predecessorLastKnownEventId, - predecessorViaServers, - } = predecessorInfoOutcome.value); - } - let successorRoomId; - let successorSetTs; - if (successorInfoOutcome.reason === undefined) { - ({ successorRoomId, successorSetTs } = successorInfoOutcome.value); - } + const { successorRoomId, successorSetTs } = successorInfo; return { id: roomId, diff --git a/server/lib/matrix-utils/resolve-room-alias.js b/server/lib/matrix-utils/resolve-room-alias.js new file mode 100644 index 0000000..d5d45bb --- /dev/null +++ b/server/lib/matrix-utils/resolve-room-alias.js @@ -0,0 +1,33 @@ +'use strict'; + +const assert = require('assert'); +const urlJoin = require('url-join'); + +const { fetchEndpointAsJson } = require('../fetch-endpoint'); +const { traceFunction } = require('../../tracing/trace-utilities'); + +const config = require('../config'); +const matrixServerUrl = config.get('matrixServerUrl'); +assert(matrixServerUrl); + +async function resolveRoomAlias({ accessToken, roomAlias, abortSignal }) { + assert(accessToken); + assert(roomAlias); + + // GET /_matrix/client/r0/directory/room/{roomAlias} -> { room_id, servers } + const resolveRoomAliasEndpoint = urlJoin( + matrixServerUrl, + `_matrix/client/r0/directory/room/${encodeURIComponent(roomAlias)}` + ); + const { data: resolveRoomAliasResData } = await fetchEndpointAsJson(resolveRoomAliasEndpoint, { + accessToken, + abortSignal, + }); + + return { + roomId: resolveRoomAliasResData.room_id, + viaServers: new Set(resolveRoomAliasResData.servers || []), + }; +} + +module.exports = traceFunction(resolveRoomAlias); diff --git a/server/lib/matrix-utils/resolve-room-id-or-alias.js b/server/lib/matrix-utils/resolve-room-id-or-alias.js new file mode 100644 index 0000000..ffece54 --- /dev/null +++ b/server/lib/matrix-utils/resolve-room-id-or-alias.js @@ -0,0 +1,40 @@ +'use strict'; + +const { + VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP, +} = require('matrix-public-archive-shared/lib/reference-values'); +const resolveRoomAlias = require('./resolve-room-alias'); + +// Given a room ID or alias, return the room ID and the set of servers we should try to +// join from. Does not attempt to join the room. +async function resolveRoomIdOrAlias({ + accessToken, + roomIdOrAlias, + viaServers = new Set(), + abortSignal, +} = {}) { + const isRoomId = roomIdOrAlias.startsWith(VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP.roomid); + const isRoomAlias = roomIdOrAlias.startsWith(VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP.r); + + if (isRoomId) { + const roomId = roomIdOrAlias; + return { roomId, viaServers }; + } else if (isRoomAlias) { + const roomAlias = roomIdOrAlias; + + const { roomId, viaServers: moreViaServers } = await resolveRoomAlias({ + accessToken, + roomAlias, + abortSignal: abortSignal, + }); + return { roomId, viaServers: new Set([...viaServers, ...moreViaServers]) }; + } + + throw new Error( + `resolveRoomIdOrAlias: Unknown roomIdOrAlias=${roomIdOrAlias} does not start with valid sigil (${Object.values( + VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP + )})` + ); +} + +module.exports = resolveRoomIdOrAlias; diff --git a/server/routes/room-routes.js b/server/routes/room-routes.js index c58dd7e..8000025 100644 --- a/server/routes/room-routes.js +++ b/server/routes/room-routes.js @@ -18,7 +18,8 @@ const { fetchSuccessorInfo, } = require('../lib/matrix-utils/fetch-room-data'); const fetchEventsFromTimestampBackwards = require('../lib/matrix-utils/fetch-events-from-timestamp-backwards'); -const ensureRoomJoined = require('../lib/matrix-utils/ensure-room-joined'); +const createRetryFnIfNotJoined = require('../lib/matrix-utils/create-retry-fn-if-not-joined'); +const resolveRoomIdOrAlias = require('../lib/matrix-utils/resolve-room-id-or-alias'); const timestampToEvent = require('../lib/matrix-utils/timestamp-to-event'); const { removeMe_fetchRoomCreateEventId } = require('../lib/matrix-utils/fetch-room-data'); const getMessagesResponseFromEventId = require('../lib/matrix-utils/get-messages-response-from-event-id'); @@ -179,21 +180,32 @@ router.get( // the time before we join and looking backwards. const dateBeforeJoin = Date.now(); - // We have to wait for the room join to happen first before we can fetch - // any of the additional room info or messages. - const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, { - viaServers: parseViaServersFromUserInput(req.query.via), + // Resolve the room ID without joining the room (optimistically assume that we're + // already joined) + let viaServers = parseViaServersFromUserInput(req.query.via); + let roomId; + ({ roomId, viaServers } = await resolveRoomIdOrAlias({ + accessToken: matrixAccessToken, + roomIdOrAlias, + viaServers, + abortSignal: req.abortSignal, + })); + // And then we can always retry after joining if we fail somewhere + const retryFnIfNotJoined = createRetryFnIfNotJoined(matrixAccessToken, roomIdOrAlias, { + viaServers, abortSignal: req.abortSignal, }); // Find the closest day to the current time with messages - const { originServerTs } = await timestampToEvent({ - accessToken: matrixAccessToken, - roomId, - ts: dateBeforeJoin, - direction: DIRECTION.backward, - abortSignal: req.abortSignal, - }); + const { originServerTs } = await retryFnIfNotJoined(() => + timestampToEvent({ + accessToken: matrixAccessToken, + roomId, + ts: dateBeforeJoin, + direction: DIRECTION.backward, + abortSignal: req.abortSignal, + }) + ); if (!originServerTs) { throw new StatusError(404, 'Unable to find day with history'); } @@ -201,8 +213,8 @@ router.get( // Redirect to a day with messages res.redirect( matrixPublicArchiveURLCreator.archiveUrlForDate(roomIdOrAlias, new Date(originServerTs), { - // We can avoid passing along the `via` query parameter because we already - // joined the room above (see `ensureRoomJoined`). + // We can avoid passing along the `viaServers` because our server already knows + // about the room given that we fetched data about it already. // //viaServers: parseViaServersFromUserInput(req.query.via), }) @@ -251,10 +263,18 @@ router.get( `?timelineEndEventId must be a string or undefined but saw ${typeof timelineStartEventId}` ); - // We have to wait for the room join to happen first before we can use the jump to - // date endpoint (or any other Matrix endpoint) - const viaServers = parseViaServersFromUserInput(req.query.via); - const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, { + // Resolve the room ID without joining the room (optimistically assume that we're + // already joined) + let viaServers = parseViaServersFromUserInput(req.query.via); + let roomId; + ({ roomId, viaServers } = await resolveRoomIdOrAlias({ + accessToken: matrixAccessToken, + roomIdOrAlias, + viaServers, + abortSignal: req.abortSignal, + })); + // And then we can always retry after joining if we fail somewhere + const retryFnIfNotJoined = createRetryFnIfNotJoined(matrixAccessToken, roomIdOrAlias, { viaServers, abortSignal: req.abortSignal, }); @@ -296,28 +316,32 @@ router.get( // Find the closest event to the given timestamp [{ eventId: eventIdForClosestEvent, originServerTs: tsForClosestEvent }, roomCreateEventId] = await Promise.all([ - timestampToEvent({ - accessToken: matrixAccessToken, - roomId, - ts: ts, - direction: dir, - // Since timestamps are untrusted and can be crafted to make loops in the - // timeline. We use this as a signal to keep progressing from this event - // regardless of what timestamp shenanigans are going on. See MSC3999 - // (https://github.com/matrix-org/matrix-spec-proposals/pull/3999) - // - // TODO: Add tests for timestamp loops once Synapse supports MSC3999. We - // currently just have this set in case some server has this implemented in - // the future but there currently is no implementation (as of 2023-04-17) and - // we can't have passing tests without a server implementation first. - // - // TODO: This isn't implemented yet - fromCausalEventId, - abortSignal: req.abortSignal, - }), - removeMe_fetchRoomCreateEventId(matrixAccessToken, roomId, { - abortSignal: req.abortSignal, - }), + retryFnIfNotJoined(() => + timestampToEvent({ + accessToken: matrixAccessToken, + roomId, + ts: ts, + direction: dir, + // Since timestamps are untrusted and can be crafted to make loops in the + // timeline. We use this as a signal to keep progressing from this event + // regardless of what timestamp shenanigans are going on. See MSC3999 + // (https://github.com/matrix-org/matrix-spec-proposals/pull/3999) + // + // TODO: Add tests for timestamp loops once Synapse supports MSC3999. We + // currently just have this set in case some server has this implemented in + // the future but there currently is no implementation (as of 2023-04-17) and + // we can't have passing tests without a server implementation first. + // + // TODO: This isn't implemented yet + fromCausalEventId, + abortSignal: req.abortSignal, + }) + ), + retryFnIfNotJoined(() => + removeMe_fetchRoomCreateEventId(matrixAccessToken, roomId, { + abortSignal: req.abortSignal, + }) + ), ]); // Without MSC3999, we currently only detect one kind of loop where the @@ -442,14 +466,16 @@ router.get( // for the actual room display that we redirect to from this route. No need for // us go out 100 messages, only for us to go backwards 100 messages again in the // next route. - const messageResData = await getMessagesResponseFromEventId({ - accessToken: matrixAccessToken, - roomId, - eventId: eventIdForClosestEvent, - dir: DIRECTION.forward, - limit: archiveMessageLimit, - abortSignal: req.abortSignal, - }); + const messageResData = await retryFnIfNotJoined(() => + getMessagesResponseFromEventId({ + accessToken: matrixAccessToken, + roomId, + eventId: eventIdForClosestEvent, + dir: DIRECTION.forward, + limit: archiveMessageLimit, + abortSignal: req.abortSignal, + }) + ); if (!messageResData.chunk?.length) { throw new StatusError( @@ -582,9 +608,11 @@ router.get( predecessorRoomId, predecessorLastKnownEventId, predecessorViaServers, - } = await fetchPredecessorInfo(matrixAccessToken, roomId, { - abortSignal: req.abortSignal, - }); + } = await retryFnIfNotJoined(() => + fetchPredecessorInfo(matrixAccessToken, roomId, { + abortSignal: req.abortSignal, + }) + ); if (!predecessorRoomId) { throw new StatusError( @@ -593,18 +621,24 @@ router.get( ); } - // We have to join the predecessor room before we can fetch the successor info - // (this could be our first time seeing the room) - await ensureRoomJoined(matrixAccessToken, predecessorRoomId, { - viaServers, - abortSignal: req.abortSignal, - }); + // We can always retry after joining if we fail somewhere + const retryFnIfNotJoinedToPredecessorRoom = createRetryFnIfNotJoined( + matrixAccessToken, + predecessorRoomId, + { + viaServers, + abortSignal: req.abortSignal, + } + ); + const { successorRoomId: successorRoomIdForPredecessor, successorSetTs: successorSetTsForPredecessor, - } = await fetchSuccessorInfo(matrixAccessToken, predecessorRoomId, { - abortSignal: req.abortSignal, - }); + } = await retryFnIfNotJoinedToPredecessorRoom(() => + fetchSuccessorInfo(matrixAccessToken, predecessorRoomId, { + abortSignal: req.abortSignal, + }) + ); let tombstoneEventId; if (!predecessorLastKnownEventId) { @@ -615,13 +649,15 @@ router.get( // // We just assume this is the tombstone event ID but in any case it gets us to // an event that happened at the same time. - ({ eventId: tombstoneEventId } = await timestampToEvent({ - accessToken: matrixAccessToken, - roomId: predecessorRoomId, - ts: successorSetTsForPredecessor, - direction: DIRECTION.backward, - abortSignal: req.abortSignal, - })); + ({ eventId: tombstoneEventId } = await retryFnIfNotJoinedToPredecessorRoom(() => + timestampToEvent({ + accessToken: matrixAccessToken, + roomId: predecessorRoomId, + ts: successorSetTsForPredecessor, + direction: DIRECTION.backward, + abortSignal: req.abortSignal, + }) + )); } // Try to continue from the tombstone event in the predecessor room because @@ -683,9 +719,11 @@ router.get( ); return; } else if (dir === DIRECTION.forward) { - const { successorRoomId } = await fetchSuccessorInfo(matrixAccessToken, roomId, { - abortSignal: req.abortSignal, - }); + const { successorRoomId } = await retryFnIfNotJoined(() => + fetchSuccessorInfo(matrixAccessToken, roomId, { + abortSignal: req.abortSignal, + }) + ); if (successorRoomId) { // Jump to the successor room and continue at the first event of the room res.redirect( @@ -788,17 +826,18 @@ router.get( ); } - // We have to wait for the room join to happen first before we can fetch - // any of the additional room info or messages. - // - // XXX: It would be better if we just tried fetching first and assume that we are - // already joined and only join after we see a 403 Forbidden error (we should do - // this for all places we `ensureRoomJoined`). But we need the `roomId` for use with - // the various Matrix API's anyway and `/join/{roomIdOrAlias}` -> `{ room_id }` is a - // great way to get it (see - // https://github.com/matrix-org/matrix-public-archive/issues/50). - const viaServers = parseViaServersFromUserInput(req.query.via); - const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, { + // Resolve the room ID without joining the room (optimistically assume that we're + // already joined) + let viaServers = parseViaServersFromUserInput(req.query.via); + let roomId; + ({ roomId, viaServers } = await resolveRoomIdOrAlias({ + accessToken: matrixAccessToken, + roomIdOrAlias, + viaServers, + abortSignal: req.abortSignal, + })); + // And then we can always retry after joining if we fail somewhere + const retryFnIfNotJoined = createRetryFnIfNotJoined(matrixAccessToken, roomIdOrAlias, { viaServers, abortSignal: req.abortSignal, }); @@ -806,26 +845,30 @@ router.get( // Do these in parallel to avoid the extra time in sequential round-trips // (we want to display the archive page faster) const [roomData, { events, stateEventMap }] = await Promise.all([ - fetchRoomData(matrixAccessToken, roomId, { abortSignal: req.abortSignal }), + retryFnIfNotJoined(() => + fetchRoomData(matrixAccessToken, roomId, { abortSignal: req.abortSignal }) + ), // We over-fetch messages outside of the range of the given day so that we // can display messages from surrounding days (currently only from days // before) so that the quiet rooms don't feel as desolate and broken. // // When given a bare date like `2022/11/16`, we want to paginate from the end of that // day backwards. This is why we use the `toTimestamp` here and fetch backwards. - fetchEventsFromTimestampBackwards({ - accessToken: matrixAccessToken, - roomId, - ts: toTimestamp, - // We fetch one more than the `archiveMessageLimit` so that we can see if there - // are too many messages from the given day. If we have over the - // `archiveMessageLimit` number of messages fetching from the given day, it's - // acceptable to have them be from surrounding days. But if all 500 messages - // (for example) are from the same day, let's redirect to a smaller hour range - // to display. - limit: archiveMessageLimit + 1, - abortSignal: req.abortSignal, - }), + retryFnIfNotJoined(() => + fetchEventsFromTimestampBackwards({ + accessToken: matrixAccessToken, + roomId, + ts: toTimestamp, + // We fetch one more than the `archiveMessageLimit` so that we can see if there + // are too many messages from the given day. If we have over the + // `archiveMessageLimit` number of messages fetching from the given day, it's + // acceptable to have them be from surrounding days. But if all 500 messages + // (for example) are from the same day, let's redirect to a smaller hour range + // to display. + limit: archiveMessageLimit + 1, + abortSignal: req.abortSignal, + }) + ), ]); // Only `world_readable` or `shared` rooms that are `public` are viewable in the archive @@ -925,8 +968,8 @@ router.get( // We purposely omit `scrollStartEventId` here because the canonical location // for any given event ID is the page it resides on. // - // We can avoid passing along the `viaServers` because we already joined the - // room above (see `ensureRoomJoined`). + // We can avoid passing along the `viaServers` because our server already knows about + // the room given that we fetched data about it already. } ), shouldIndex,