Skip to content

Commit e2ae3e3

Browse files
authored
[server] improved logging (#18697)
1 parent c23f106 commit e2ae3e3

File tree

6 files changed

+97
-65
lines changed

6 files changed

+97
-65
lines changed

components/server/src/jobs/runner.ts

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { WebhookEventGarbageCollector } from "./webhook-gc";
1818
import { WorkspaceGarbageCollector } from "./workspace-gc";
1919
import { SnapshotsJob } from "./snapshots";
2020
import { RelationshipUpdateJob } from "../authorization/relationship-updater-job";
21+
import { runWithContext } from "../util/log-context";
2122

2223
export const Job = Symbol("Job");
2324

@@ -78,29 +79,31 @@ export class JobRunner {
7879

7980
try {
8081
await this.mutex.using([job.name, ...(job.lockedResources || [])], job.frequencyMs, async (signal) => {
81-
log.info(`Acquired lock for job ${job.name}.`, logCtx);
82-
// we want to hold the lock for the entire duration of the job, so we return earliest after frequencyMs
83-
const timeout = new Promise<void>((resolve) => setTimeout(resolve, job.frequencyMs));
84-
const timer = jobsDurationSeconds.startTimer({ name: job.name });
85-
reportJobStarted(job.name);
86-
const now = new Date().getTime();
87-
try {
88-
await job.run();
89-
log.info(`Successfully finished job ${job.name}`, {
90-
...logCtx,
91-
jobTookSec: `${(new Date().getTime() - now) / 1000}s`,
92-
});
93-
reportJobCompleted(job.name, true);
94-
} catch (err) {
95-
log.error(`Error while running job ${job.name}`, err, {
96-
...logCtx,
97-
jobTookSec: `${(new Date().getTime() - now) / 1000}s`,
98-
});
99-
reportJobCompleted(job.name, false);
100-
} finally {
101-
jobsDurationSeconds.observe(timer());
102-
await timeout;
103-
}
82+
await runWithContext(job.name, {}, async () => {
83+
log.info(`Acquired lock for job ${job.name}.`, logCtx);
84+
// we want to hold the lock for the entire duration of the job, so we return earliest after frequencyMs
85+
const timeout = new Promise<void>((resolve) => setTimeout(resolve, job.frequencyMs));
86+
const timer = jobsDurationSeconds.startTimer({ name: job.name });
87+
reportJobStarted(job.name);
88+
const now = new Date().getTime();
89+
try {
90+
await job.run();
91+
log.info(`Successfully finished job ${job.name}`, {
92+
...logCtx,
93+
jobTookSec: `${(new Date().getTime() - now) / 1000}s`,
94+
});
95+
reportJobCompleted(job.name, true);
96+
} catch (err) {
97+
log.error(`Error while running job ${job.name}`, err, {
98+
...logCtx,
99+
jobTookSec: `${(new Date().getTime() - now) / 1000}s`,
100+
});
101+
reportJobCompleted(job.name, false);
102+
} finally {
103+
jobsDurationSeconds.observe(timer());
104+
await timeout;
105+
}
106+
});
104107
});
105108
} catch (err) {
106109
if (err instanceof ResourceLockedError || err instanceof ExecutionError) {

components/server/src/messaging/redis-subscriber.ts

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import {
2929
} from "../prometheus-metrics";
3030
import { Redis } from "ioredis";
3131
import { WorkspaceDB } from "@gitpod/gitpod-db/lib";
32+
import { runWithContext } from "../util/log-context";
3233

3334
const UNDEFINED_KEY = "undefined";
3435

@@ -54,18 +55,20 @@ export class RedisSubscriber {
5455
}
5556

5657
this.redis.on("message", async (channel: string, message: string) => {
57-
reportRedisUpdateReceived(channel);
58-
59-
let err: Error | undefined;
60-
try {
61-
await this.onMessage(channel, message);
62-
log.debug("[redis] Succesfully handled update", { channel, message });
63-
} catch (e) {
64-
err = e;
65-
log.error("[redis] Failed to handle message from Pub/Sub", e, { channel, message });
66-
} finally {
67-
reportRedisUpdateCompleted(channel, err);
68-
}
58+
await runWithContext("redis-subscriber", {}, async () => {
59+
reportRedisUpdateReceived(channel);
60+
61+
let err: Error | undefined;
62+
try {
63+
await this.onMessage(channel, message);
64+
log.debug("[redis] Succesfully handled update", { channel, message });
65+
} catch (e) {
66+
err = e;
67+
log.error("[redis] Failed to handle message from Pub/Sub", e, { channel, message });
68+
} finally {
69+
reportRedisUpdateCompleted(channel, err);
70+
}
71+
});
6972
});
7073
}
7174

components/server/src/server.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import { GitHubEnterpriseApp } from "./prebuilds/github-enterprise-app";
4747
import { JobRunner } from "./jobs/runner";
4848
import { RedisSubscriber } from "./messaging/redis-subscriber";
4949
import { HEADLESS_LOGS_PATH_PREFIX, HEADLESS_LOG_DOWNLOAD_PATH_PREFIX } from "./workspace/headless-log-service";
50+
import { runWithContext } from "./util/log-context";
5051

5152
@injectable()
5253
export class Server {
@@ -138,6 +139,16 @@ export class Server {
138139
// Install passport
139140
await this.authenticator.init(app);
140141

142+
// log context
143+
app.use(async (req: express.Request, res: express.Response, next: express.NextFunction) => {
144+
try {
145+
const userId = req.user ? req.user.id : undefined;
146+
await runWithContext("http", { userId }, () => next());
147+
} catch (err) {
148+
next(err);
149+
}
150+
});
151+
141152
// Ensure that host contexts of dynamic auth providers are initialized.
142153
await this.hostCtxProvider.init();
143154

components/server/src/util/log-context.ts

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,33 @@ import { v4 } from "uuid";
1313
type EnhancedLogContext = LogContext & {
1414
contextId?: string;
1515
contextTimeMs: number;
16+
contextKind: string;
1617
};
18+
1719
const asyncLocalStorage = new AsyncLocalStorage<EnhancedLogContext>();
1820
const augmenter: LogContext.Augmenter = (ctx) => {
1921
const globalContext = asyncLocalStorage.getStore();
20-
const contextTime = globalContext?.contextTimeMs ? Date.now() - globalContext.contextTimeMs : undefined;
21-
return {
22+
const contextTimeMs = globalContext?.contextTimeMs ? Date.now() - globalContext.contextTimeMs : undefined;
23+
const result = {
2224
...globalContext,
23-
contextTime,
25+
contextTimeMs,
2426
...ctx,
2527
};
28+
// if its an empty object return undefined
29+
return Object.keys(result).length === 0 ? undefined : result;
2630
};
2731
LogContext.setAugmenter(augmenter);
2832

29-
export async function runWithContext<T>(context: LogContext, fun: () => T): Promise<T> {
33+
export async function runWithContext<T>(
34+
contextKind: string,
35+
context: LogContext & { contextId?: string },
36+
fun: () => T,
37+
): Promise<T> {
3038
return asyncLocalStorage.run(
3139
{
3240
...context,
33-
contextId: v4(),
41+
contextKind,
42+
contextId: context.contextId || v4(),
3443
contextTimeMs: Date.now(),
3544
},
3645
fun,

components/server/src/websocket/websocket-connection-manager.ts

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -374,21 +374,33 @@ class GitpodJsonRpcProxyFactory<T extends object> extends JsonRpcProxyFactory<T>
374374
}
375375

376376
protected async onRequest(method: string, ...args: any[]): Promise<any> {
377+
const span = TraceContext.startSpan(method, undefined);
377378
const userId = this.clientMetadata.userId;
379+
const requestId = span.context().toTraceId();
378380
return runWithContext(
381+
"request",
379382
{
380383
userId,
384+
contextId: requestId,
381385
},
382386
() => {
383-
return this.internalOnRequest(method, ...args);
387+
try {
388+
return this.internalOnRequest(span, requestId, method, ...args);
389+
} finally {
390+
span.finish();
391+
}
384392
},
385393
);
386394
}
387395

388-
private async internalOnRequest(method: string, ...args: any[]): Promise<any> {
389-
const span = TraceContext.startSpan(method, undefined);
390-
const ctx = { span };
396+
private async internalOnRequest(
397+
span: opentracing.Span,
398+
requestId: string,
399+
method: string,
400+
...args: any[]
401+
): Promise<any> {
391402
const userId = this.clientMetadata.userId;
403+
const ctx = { span };
392404
const timer = apiCallDurationHistogram.startTimer();
393405
try {
394406
// generic tracing data
@@ -432,7 +444,7 @@ class GitpodJsonRpcProxyFactory<T extends object> extends JsonRpcProxyFactory<T>
432444
observeAPICallsDuration(method, 200, timer());
433445
return result;
434446
} catch (e) {
435-
const traceID = span.context().toTraceId();
447+
const requestIdMessage = ` If this error is unexpected, please quote the request ID '${requestId}' when reaching out to Gitpod Support.`;
436448
if (ApplicationError.hasErrorCode(e)) {
437449
increaseApiCallCounter(method, e.code);
438450
observeAPICallsDuration(method, e.code, timer());
@@ -449,13 +461,13 @@ class GitpodJsonRpcProxyFactory<T extends object> extends JsonRpcProxyFactory<T>
449461
message: e.message,
450462
},
451463
);
452-
throw new ResponseError(e.code, e.message, e.data);
464+
throw new ResponseError(e.code, e.message + requestIdMessage, e.data);
453465
} else {
454466
TraceContext.setError(ctx, e); // this is a "real" error
455467

456468
const err = new ApplicationError(
457469
ErrorCodes.INTERNAL_SERVER_ERROR,
458-
`Internal server error. Please quote trace ID: '${traceID}' when reaching to Gitpod Support`,
470+
`Internal server error: '${e.message}'` + requestIdMessage,
459471
);
460472
increaseApiCallCounter(method, err.code);
461473
observeAPICallsDuration(method, err.code, timer());
@@ -464,8 +476,6 @@ class GitpodJsonRpcProxyFactory<T extends object> extends JsonRpcProxyFactory<T>
464476
log.error({ userId }, `Request ${method} failed with internal server error`, e, { method, args });
465477
throw new ResponseError(ErrorCodes.INTERNAL_SERVER_ERROR, e.message);
466478
}
467-
} finally {
468-
span.finish();
469479
}
470480
}
471481

components/server/src/workspace/workspace-starter.ts

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,13 @@ export class WorkspaceStarter {
484484
const span = TraceContext.startSpan("actuallyStartWorkspace", ctx);
485485
const region = instance.configuration.regionPreference;
486486
span.setTag("region_preference", region);
487-
log.info("Attempting to start workspace", {
488-
instanceID: instance.id,
489-
userID: user.id,
487+
const logCtx: LogContext = {
488+
instanceId: instance.id,
489+
userId: user.id,
490+
organizationId: workspace.organizationId,
491+
workspaceId: workspace.id,
492+
};
493+
log.info(logCtx, "Attempting to start workspace", {
490494
forceRebuild: forceRebuild,
491495
});
492496

@@ -524,18 +528,10 @@ export class WorkspaceStarter {
524528
const ideUrlPromise = new Deferred<string>();
525529
const before = Date.now();
526530
const logSuccess = (fromWsManager: boolean) => {
527-
log.info(
528-
{
529-
instanceId: instance.id,
530-
userId: workspace.ownerId,
531-
workspaceId: workspace.id,
532-
},
533-
"Received ideURL",
534-
{
535-
tookMs: Date.now() - before,
536-
fromWsManager,
537-
},
538-
);
531+
log.info(logCtx, "Received ideURL", {
532+
tookMs: Date.now() - before,
533+
fromWsManager,
534+
});
539535
};
540536

541537
const doStartWorkspace = async () => {
@@ -623,7 +619,7 @@ export class WorkspaceStarter {
623619
intervalHandle.dispose();
624620
}
625621
} catch (err) {
626-
this.logAndTraceStartWorkspaceError({ span }, { userId: user.id, instanceId: instance.id }, err);
622+
this.logAndTraceStartWorkspaceError({ span }, logCtx, err);
627623

628624
return { instanceID: instance.id };
629625
} finally {

0 commit comments

Comments
 (0)