Skip to content

Commit 6509105

Browse files
authored
Merge pull request #8605 from sagemathinc/revert-sticky-routing
Revert sticky routing series (3e0a443..45bcc84).
2 parents 5717f05 + 05a5388 commit 6509105

File tree

10 files changed

+386
-329
lines changed

10 files changed

+386
-329
lines changed

src/packages/backend/conat/test/cluster/cluster-sticky-state.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
after,
44
defaultCluster as servers,
55
waitForConsistentState,
6+
wait,
67
addNodeToDefaultCluster,
78
} from "@cocalc/backend/conat/test/setup";
89
import { STICKY_QUEUE_GROUP } from "@cocalc/conat/core/client";
@@ -45,6 +46,9 @@ describe("ensure sticky state sync and use is working properly", () => {
4546
);
4647
// publishing causes a choice to be made and saved on servers[0]
4748
await clients[0].publish(`subject.${i}.foo`, "hello");
49+
expect(servers[0].sticky[`subject.${i}.*`]).not.toBe(undefined);
50+
// but no choice on servers[1]
51+
expect(servers[1].sticky[`subject.${i}.*`]).toBe(undefined);
4852
}
4953
});
5054

@@ -61,12 +65,51 @@ describe("ensure sticky state sync and use is working properly", () => {
6165
chosen = await Promise.race([p0(), p1()]);
6266
});
6367

68+
it(`sticky on servers[0] should have ${count} entries starting in "subject".`, async () => {
69+
const v = Object.keys(servers[0].sticky).filter((s) =>
70+
s.startsWith("subject."),
71+
);
72+
expect(v.length).toBe(count);
73+
});
74+
75+
it(`sticky on servers[1] should have no entries starting in "subject".`, async () => {
76+
const v = Object.keys(servers[1].sticky).filter((s) =>
77+
s.startsWith("subject."),
78+
);
79+
expect(v.length).toBe(0);
80+
});
81+
82+
it(`servers[1]'s link to servers[0] should *eventually* have ${count} entries starting in "subject."`, async () => {
83+
// @ts-ignore
84+
const link = servers[1].clusterLinksByAddress[servers[0].address()];
85+
let v;
86+
await wait({
87+
until: () => {
88+
v = Object.keys(link.sticky).filter((s) => s.startsWith("subject."));
89+
return v.length == count;
90+
},
91+
});
92+
expect(v.length).toBe(count);
93+
});
94+
6495
it("send message from clients[1] to each subject", async () => {
6596
for (let i = 0; i < count; i++) {
6697
await clients[1].publish(`subject.${i}.foo`);
6798
}
6899
});
69100

101+
// Sometimes this fails under very heavy load.
102+
// It's not a good test, because it probably hits some timeouts sometimes, and
103+
// it is testing internal structure/optimizations, not behavior.
104+
// Note also that minimizing sticky state computation is just an optimization so even if this test were failing
105+
// due to a bug, it might just mean things are slightly slower.
106+
// it(`sticky on servers[1] should STILL have no entries starting in "subject", since no choices had to be made`, async () => {
107+
// const v = Object.keys(servers[1].sticky).filter((s) =>
108+
// s.startsWith("subject."),
109+
// );
110+
// expect(v.length).toBe(0);
111+
// });
112+
70113
async function deliveryTest() {
71114
const sub = chosen == 0 ? subs0[0] : subs1[0];
72115

@@ -111,6 +154,17 @@ describe("ensure sticky state sync and use is working properly", () => {
111154
await waitForConsistentState(servers);
112155
});
113156

157+
it("double check the links have the sticky state", () => {
158+
for (const server of servers.slice(1)) {
159+
// @ts-ignore
160+
const link = server.clusterLinksByAddress[servers[0].address()];
161+
const v = Object.keys(link.sticky).filter((s) =>
162+
s.startsWith("subject."),
163+
);
164+
expect(v.length).toBe(count);
165+
}
166+
});
167+
114168
it(
115169
"in bigger, cluster, publish from every node to subject.0.foo",
116170
deliveryTest,
@@ -126,6 +180,14 @@ describe("ensure sticky state sync and use is working properly", () => {
126180
}
127181
sub.close();
128182
});
183+
184+
it("unjoining servers[0] from servers[1] should transfer the sticky state to servers[1]", async () => {
185+
await servers[1].unjoin({ address: servers[0].address() });
186+
const v = Object.keys(servers[1].sticky).filter((s) =>
187+
s.startsWith("subject."),
188+
);
189+
expect(v.length).toBe(count);
190+
});
129191
});
130192

131193
afterAll(after);

src/packages/backend/conat/test/setup.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ export async function waitForConsistentState(
206206
// now look at everybody else's view of servers[i].
207207
// @ts-ignore
208208
const a = servers[i].interest.serialize().patterns;
209+
const b = servers[i].sticky;
209210
const hashServer = servers[i].hash();
210211
for (let j = 0; j < servers.length; j++) {
211212
if (i != j) {
@@ -219,8 +220,9 @@ export async function waitForConsistentState(
219220
}
220221
const hashLink = link.hash();
221222
const x = link.interest.serialize().patterns;
223+
const y = link.sticky;
222224
const showInfo = () => {
223-
for (const type of ["interest"]) {
225+
for (const type of ["interest", "sticky"]) {
224226
console.log(
225227
`server stream ${type}: `,
226228
hashServer[type],
@@ -252,6 +254,8 @@ export async function waitForConsistentState(
252254
j,
253255
serverInterest: a,
254256
linkInterest: x,
257+
serverSticky: b,
258+
linkSticky: y,
255259
});
256260
};
257261
if (!isEqual(hashServer, hashLink)) {

src/packages/backend/conat/test/sync/open-files.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ to open so they can fulfill their backend responsibilities:
88
99
DEVELOPMENT:
1010
11-
pnpm test `pwd`/open-files.test.ts
11+
pnpm test ./open-files.test.ts
1212
1313
*/
1414

src/packages/conat/core/cluster.ts

Lines changed: 83 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import { type Client, connect } from "./client";
22
import { Patterns } from "./patterns";
3-
import { updateInterest, type InterestUpdate } from "@cocalc/conat/core/server";
3+
import {
4+
updateInterest,
5+
updateSticky,
6+
type InterestUpdate,
7+
type StickyUpdate,
8+
} from "@cocalc/conat/core/server";
49
import type { DStream } from "@cocalc/conat/sync/dstream";
510
import { once } from "@cocalc/util/async-utils";
611
import { server as createPersistServer } from "@cocalc/conat/persist/server";
@@ -42,12 +47,14 @@ export async function clusterLink(
4247
return link;
4348
}
4449

50+
export type Sticky = { [pattern: string]: { [subject: string]: string } };
4551
export type Interest = Patterns<{ [queue: string]: Set<string> }>;
4652

4753
export { type ClusterLink };
4854

4955
class ClusterLink {
5056
public interest: Interest = new Patterns();
57+
public sticky: Sticky = {};
5158
private streams: ClusterStreams;
5259
private state: "init" | "ready" | "closed" = "init";
5360
private clientStateChanged = Date.now(); // when client status last changed
@@ -78,7 +85,10 @@ class ClusterLink {
7885
clusterName: this.clusterName,
7986
});
8087
for (const update of this.streams.interest.getAll()) {
81-
updateInterest(update, this.interest);
88+
updateInterest(update, this.interest, this.sticky);
89+
}
90+
for (const update of this.streams.sticky.getAll()) {
91+
updateSticky(update, this.sticky);
8292
}
8393
// I have a slight concern about this because updates might not
8494
// arrive in order during automatic failover. That said, maybe
@@ -87,6 +97,7 @@ class ClusterLink {
8797
// it is about, and when that server goes down none of this state
8898
// matters anymore.
8999
this.streams.interest.on("change", this.handleInterestUpdate);
100+
this.streams.sticky.on("change", this.handleStickyUpdate);
90101
this.state = "ready";
91102
};
92103

@@ -95,7 +106,11 @@ class ClusterLink {
95106
};
96107

97108
handleInterestUpdate = (update: InterestUpdate) => {
98-
updateInterest(update, this.interest);
109+
updateInterest(update, this.interest, this.sticky);
110+
};
111+
112+
handleStickyUpdate = (update: StickyUpdate) => {
113+
updateSticky(update, this.sticky);
99114
};
100115

101116
private handleClientStateChanged = () => {
@@ -119,6 +134,7 @@ class ClusterLink {
119134
if (this.streams != null) {
120135
this.streams.interest.removeListener("change", this.handleInterestUpdate);
121136
this.streams.interest.close();
137+
this.streams.sticky.close();
122138
// @ts-ignore
123139
delete this.streams;
124140
}
@@ -162,9 +178,10 @@ class ClusterLink {
162178
return false;
163179
};
164180

165-
hash = (): { interest: number } => {
181+
hash = (): { interest: number; sticky: number } => {
166182
return {
167183
interest: hashInterest(this.interest),
184+
sticky: hashSticky(this.sticky),
168185
};
169186
};
170187
}
@@ -178,6 +195,7 @@ function clusterStreamNames({
178195
}) {
179196
return {
180197
interest: `cluster/${clusterName}/${id}/interest`,
198+
sticky: `cluster/${clusterName}/${id}/sticky`,
181199
};
182200
}
183201

@@ -207,6 +225,7 @@ export async function createClusterPersistServer({
207225

208226
export interface ClusterStreams {
209227
interest: DStream<InterestUpdate>;
228+
sticky: DStream<StickyUpdate>;
210229
}
211230

212231
export async function clusterStreams({
@@ -233,21 +252,27 @@ export async function clusterStreams({
233252
name: names.interest,
234253
...opts,
235254
});
255+
const sticky = await client.sync.dstream<StickyUpdate>({
256+
noInventory: true,
257+
name: names.sticky,
258+
...opts,
259+
});
236260
logger.debug("clusterStreams: got them", { clusterName });
237-
return { interest };
261+
return { interest, sticky };
238262
}
239263

240264
// Periodically delete not-necessary updates from the interest stream
241265
export async function trimClusterStreams(
242266
streams: ClusterStreams,
243267
data: {
244268
interest: Patterns<{ [queue: string]: Set<string> }>;
269+
sticky: { [pattern: string]: { [subject: string]: string } };
245270
links: { interest: Patterns<{ [queue: string]: Set<string> }> }[];
246271
},
247272
// don't delete anything that isn't at lest minAge ms old.
248273
minAge: number,
249-
): Promise<{ seqsInterest: number[] }> {
250-
const { interest } = streams;
274+
): Promise<{ seqsInterest: number[]; seqsSticky: number[] }> {
275+
const { interest, sticky } = streams;
251276
// First deal with interst
252277
// we iterate over the interest stream checking for subjects
253278
// with no current interest at all; in such cases it is safe
@@ -275,7 +300,45 @@ export async function trimClusterStreams(
275300
logger.debug("trimClusterStream: successfully trimmed interest", { seqs });
276301
}
277302

278-
return { seqsInterest: seqs };
303+
// Next deal with sticky -- trim ones where the pattern is no longer of interest.
304+
// There could be other reasons to trim but it gets much trickier. This one is more
305+
// obvious, except we have to check for any interest in the whole cluster, not
306+
// just this node.
307+
const seqs2: number[] = [];
308+
function noInterest(pattern: string) {
309+
if (data.interest.hasPattern(pattern)) {
310+
return false;
311+
}
312+
for (const link of data.links) {
313+
if (link.interest.hasPattern(pattern)) {
314+
return false;
315+
}
316+
}
317+
// nobody cares
318+
return true;
319+
}
320+
for (let n = 0; n < sticky.length; n++) {
321+
const time = sticky.time(n);
322+
if (time == null) continue;
323+
if (now - time.valueOf() <= minAge) {
324+
break;
325+
}
326+
const update = sticky.get(n) as StickyUpdate;
327+
if (noInterest(update.pattern)) {
328+
const seq = sticky.seq(n);
329+
if (seq != null) {
330+
seqs2.push(seq);
331+
}
332+
}
333+
}
334+
if (seqs2.length > 0) {
335+
// [ ] todo -- add to interest.delete a version where it takes an array of sequence numbers
336+
logger.debug("trimClusterStream: trimming sticky", { seqs2 });
337+
await sticky.delete({ seqs: seqs2 });
338+
logger.debug("trimClusterStream: successfully trimmed sticky", { seqs2 });
339+
}
340+
341+
return { seqsInterest: seqs, seqsSticky: seqs2 };
279342
}
280343

281344
function hashSet(X: Set<string>): number {
@@ -299,3 +362,15 @@ export function hashInterest(
299362
): number {
300363
return interest.hash(hashInterestValue);
301364
}
365+
366+
export function hashSticky(sticky: Sticky): number {
367+
let h = 0;
368+
for (const pattern in sticky) {
369+
h += hash_string(pattern);
370+
const x = sticky[pattern];
371+
for (const subject in x) {
372+
h += hash_string(x[subject]);
373+
}
374+
}
375+
return h;
376+
}

0 commit comments

Comments
 (0)