From 0c1af2c59a0e7b67c5413b0c44554c0bc3da653b Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 09:08:43 +0000 Subject: [PATCH 1/9] [server] Introduce ReadinessController and probe at /ready Tool: gitpod/catfood.gitpod.cloud --- components/server/src/container-module.ts | 2 + .../src/liveness/readiness-controller.ts | 80 +++++++++++++++++++ components/server/src/server.ts | 3 + .../pkg/components/server/deployment.go | 72 +++++++++++------ 4 files changed, 132 insertions(+), 25 deletions(-) create mode 100644 components/server/src/liveness/readiness-controller.ts diff --git a/components/server/src/container-module.ts b/components/server/src/container-module.ts index 58d09ef485b4c5..73d07863871f67 100644 --- a/components/server/src/container-module.ts +++ b/components/server/src/container-module.ts @@ -71,6 +71,7 @@ import { WebhookEventGarbageCollector } from "./jobs/webhook-gc"; import { WorkspaceGarbageCollector } from "./jobs/workspace-gc"; import { LinkedInService } from "./linkedin-service"; import { LivenessController } from "./liveness/liveness-controller"; +import { ReadinessController } from "./liveness/readiness-controller"; import { RedisSubscriber } from "./messaging/redis-subscriber"; import { MonitoringEndpointsApp } from "./monitoring-endpoints"; import { OAuthController } from "./oauth-server/oauth-controller"; @@ -241,6 +242,7 @@ export const productionContainerModule = new ContainerModule( bind(WorkspaceDownloadService).toSelf().inSingletonScope(); bind(LivenessController).toSelf().inSingletonScope(); + bind(ReadinessController).toSelf().inSingletonScope(); bind(OneTimeSecretServer).toSelf().inSingletonScope(); diff --git a/components/server/src/liveness/readiness-controller.ts b/components/server/src/liveness/readiness-controller.ts new file mode 100644 index 00000000000000..70b628be7359da --- /dev/null +++ b/components/server/src/liveness/readiness-controller.ts @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2025 Gitpod GmbH. All rights reserved. + * Licensed under the GNU Affero General Public License (AGPL). + * See License.AGPL.txt in the project root for license information. + */ + +import { injectable, inject } from "inversify"; +import express from "express"; +import { TypeORM } from "@gitpod/gitpod-db/lib"; +import { SpiceDBClientProvider } from "../authorization/spicedb"; +import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; +import { ReadSchemaRequest } from "@authzed/authzed-node/dist/src/v1"; + +@injectable() +export class ReadinessController { + @inject(TypeORM) protected readonly typeOrm: TypeORM; + @inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider; + + get apiRouter(): express.Router { + const router = express.Router(); + this.addReadinessHandler(router); + return router; + } + + protected addReadinessHandler(router: express.Router) { + router.get("/", async (_, res) => { + try { + // Check database connection + const dbConnection = await this.checkDatabaseConnection(); + if (!dbConnection) { + log.warn("Readiness check failed: Database connection failed"); + res.status(503).send("Database connection failed"); + return; + } + + // Check SpiceDB connection + const spiceDBConnection = await this.checkSpiceDBConnection(); + if (!spiceDBConnection) { + log.warn("Readiness check failed: SpiceDB connection failed"); + res.status(503).send("SpiceDB connection failed"); + return; + } + + // Both connections are good + res.status(200).send("Ready"); + } catch (error) { + log.error("Readiness check failed", error); + res.status(503).send("Readiness check failed"); + } + }); + } + + private async checkDatabaseConnection(): Promise { + try { + const connection = await this.typeOrm.getConnection(); + // Simple query to verify connection is working + await connection.query("SELECT 1"); + return true; + } catch (error) { + log.error("Database connection check failed", error); + return false; + } + } + + private async checkSpiceDBConnection(): Promise { + try { + const client = this.spiceDBClientProvider.getClient(); + + // Send a request, to verify that the connection works + const req = ReadSchemaRequest.create({}); + const response = await client.readSchema(req); + log.debug("SpiceDB connection check successful", { schemaLength: response.schemaText.length }); + + return true; + } catch (error) { + log.error("SpiceDB connection check failed", error); + return false; + } + } +} diff --git a/components/server/src/server.ts b/components/server/src/server.ts index 57facbb12560ab..e84bb3b7db8921 100644 --- a/components/server/src/server.ts +++ b/components/server/src/server.ts @@ -37,6 +37,7 @@ import { Config } from "./config"; import { DebugApp } from "@gitpod/gitpod-protocol/lib/util/debug-app"; import { WsConnectionHandler } from "./express/ws-connection-handler"; import { LivenessController } from "./liveness/liveness-controller"; +import { ReadinessController } from "./liveness/readiness-controller"; import { IamSessionApp } from "./iam/iam-session-app"; import { API } from "./api/server"; import { GithubApp } from "./prebuilds/github-app"; @@ -80,6 +81,7 @@ export class Server { @inject(WebsocketConnectionManager) private readonly websocketConnectionHandler: WebsocketConnectionManager, @inject(WorkspaceDownloadService) private readonly workspaceDownloadService: WorkspaceDownloadService, @inject(LivenessController) private readonly livenessController: LivenessController, + @inject(ReadinessController) private readonly readinessController: ReadinessController, @inject(MonitoringEndpointsApp) private readonly monitoringEndpointsApp: MonitoringEndpointsApp, @inject(CodeSyncService) private readonly codeSyncService: CodeSyncService, @inject(HeadlessLogController) private readonly headlessLogController: HeadlessLogController, @@ -320,6 +322,7 @@ export class Server { app.use(this.oneTimeSecretServer.apiRouter); app.use(this.newsletterSubscriptionController.apiRouter); app.use("/live", this.livenessController.apiRouter); + app.use("/ready", this.readinessController.apiRouter); app.use("/version", (req: express.Request, res: express.Response, next: express.NextFunction) => { res.send(this.config.version); }); diff --git a/install/installer/pkg/components/server/deployment.go b/install/installer/pkg/components/server/deployment.go index ad973c6143ed00..f80514df7bc596 100644 --- a/install/installer/pkg/components/server/deployment.go +++ b/install/installer/pkg/components/server/deployment.go @@ -371,35 +371,57 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) { PeriodSeconds: 10, FailureThreshold: 6, }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/ready", + Port: intstr.IntOrString{ + Type: intstr.Int, + IntVal: ContainerPort, + }, + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 10, + FailureThreshold: 12, // try for 120 seconds, then re-start the container + }, SecurityContext: &corev1.SecurityContext{ Privileged: pointer.Bool(false), AllowPrivilegeEscalation: pointer.Bool(false), }, - Ports: []corev1.ContainerPort{{ - Name: ContainerPortName, - ContainerPort: ContainerPort, - }, { - Name: baseserver.BuiltinMetricsPortName, - ContainerPort: baseserver.BuiltinMetricsPort, - }, { - Name: InstallationAdminName, - ContainerPort: InstallationAdminPort, - }, { - Name: IAMSessionPortName, - ContainerPort: IAMSessionPort, - }, { - Name: DebugPortName, - ContainerPort: baseserver.BuiltinDebugPort, - }, { - Name: DebugNodePortName, - ContainerPort: common.DebugNodePort, - }, { - Name: GRPCAPIName, - ContainerPort: GRPCAPIPort, - }, { - Name: PublicAPIName, - ContainerPort: PublicAPIPort, - }, + Ports: []corev1.ContainerPort{ + { + Name: ContainerPortName, + ContainerPort: ContainerPort, + }, + { + Name: baseserver.BuiltinMetricsPortName, + ContainerPort: baseserver.BuiltinMetricsPort, + }, + { + Name: InstallationAdminName, + ContainerPort: InstallationAdminPort, + }, + { + Name: IAMSessionPortName, + ContainerPort: IAMSessionPort, + }, + { + Name: DebugPortName, + ContainerPort: baseserver.BuiltinDebugPort, + }, + { + Name: DebugNodePortName, + ContainerPort: common.DebugNodePort, + }, + { + Name: GRPCAPIName, + ContainerPort: GRPCAPIPort, + }, + { + Name: PublicAPIName, + ContainerPort: PublicAPIPort, + }, }, // todo(sje): do we need to cater for serverContainer.env from values.yaml? Env: common.CustomizeEnvvar(ctx, Component, env), From aedcdd7612491dee181b151badc793cf3d9b2552 Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 09:25:26 +0000 Subject: [PATCH 2/9] [server] Move /live and /ready endpoints to a separate express app and port Tool: gitpod/catfood.gitpod.cloud --- components/server/src/container-module.ts | 3 +++ components/server/src/liveness/probes.ts | 25 +++++++++++++++++ components/server/src/server.ts | 27 +++++++++++++------ .../pkg/components/server/constants.go | 2 ++ .../pkg/components/server/deployment.go | 8 ++++-- 5 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 components/server/src/liveness/probes.ts diff --git a/components/server/src/container-module.ts b/components/server/src/container-module.ts index 73d07863871f67..0a1af2352bc012 100644 --- a/components/server/src/container-module.ts +++ b/components/server/src/container-module.ts @@ -136,6 +136,7 @@ import { AnalyticsController } from "./analytics-controller"; import { InstallationAdminCleanup } from "./jobs/installation-admin-cleanup"; import { AuditLogService } from "./audit/AuditLogService"; import { AuditLogGarbageCollectorJob } from "./jobs/auditlog-gc"; +import { ProbesApp } from "./liveness/probes"; export const productionContainerModule = new ContainerModule( (bind, unbind, isBound, rebind, unbindAsync, onActivation, onDeactivation) => { @@ -241,6 +242,8 @@ export const productionContainerModule = new ContainerModule( bind(IWorkspaceManagerClientCallMetrics).toService(IClientCallMetrics); bind(WorkspaceDownloadService).toSelf().inSingletonScope(); + + bind(ProbesApp).toSelf().inSingletonScope(); bind(LivenessController).toSelf().inSingletonScope(); bind(ReadinessController).toSelf().inSingletonScope(); diff --git a/components/server/src/liveness/probes.ts b/components/server/src/liveness/probes.ts new file mode 100644 index 00000000000000..160c02de5af4c3 --- /dev/null +++ b/components/server/src/liveness/probes.ts @@ -0,0 +1,25 @@ +/** + * Copyright (c) 2025 Gitpod GmbH. All rights reserved. + * Licensed under the GNU Affero General Public License (AGPL). + * See License.AGPL.txt in the project root for license information. + */ + +import express from "express"; +import { inject, injectable } from "inversify"; +import { LivenessController } from "./liveness-controller"; +import { ReadinessController } from "./readiness-controller"; + +@injectable() +export class ProbesApp { + constructor( + @inject(LivenessController) protected readonly livenessController: LivenessController, + @inject(ReadinessController) protected readonly readinessController: ReadinessController, + ) {} + + public create(): express.Application { + const probesApp = express(); + probesApp.use("/live", this.livenessController.apiRouter); + probesApp.use("/ready", this.readinessController.apiRouter); + return probesApp; + } +} diff --git a/components/server/src/server.ts b/components/server/src/server.ts index e84bb3b7db8921..8371e6111c1a6e 100644 --- a/components/server/src/server.ts +++ b/components/server/src/server.ts @@ -36,8 +36,6 @@ import { NewsletterSubscriptionController } from "./user/newsletter-subscription import { Config } from "./config"; import { DebugApp } from "@gitpod/gitpod-protocol/lib/util/debug-app"; import { WsConnectionHandler } from "./express/ws-connection-handler"; -import { LivenessController } from "./liveness/liveness-controller"; -import { ReadinessController } from "./liveness/readiness-controller"; import { IamSessionApp } from "./iam/iam-session-app"; import { API } from "./api/server"; import { GithubApp } from "./prebuilds/github-app"; @@ -54,6 +52,11 @@ import { } from "./workspace/headless-log-service"; import { runWithRequestContext } from "./util/request-context"; import { AnalyticsController } from "./analytics-controller"; +import { ProbesApp as ProbesAppProvider } from "./liveness/probes"; + +const MONITORING_PORT = 9500; +const IAM_SESSION_PORT = 9876; +const PROBES_PORT = 9400; @injectable() export class Server { @@ -66,6 +69,8 @@ export class Server { protected privateApiServer?: http.Server; protected readonly eventEmitter = new EventEmitter(); + protected probesApp: express.Application; + protected probesServer?: http.Server; protected app?: express.Application; protected httpServer?: http.Server; protected monitoringApp?: express.Application; @@ -80,8 +85,6 @@ export class Server { @inject(UserController) private readonly userController: UserController, @inject(WebsocketConnectionManager) private readonly websocketConnectionHandler: WebsocketConnectionManager, @inject(WorkspaceDownloadService) private readonly workspaceDownloadService: WorkspaceDownloadService, - @inject(LivenessController) private readonly livenessController: LivenessController, - @inject(ReadinessController) private readonly readinessController: ReadinessController, @inject(MonitoringEndpointsApp) private readonly monitoringEndpointsApp: MonitoringEndpointsApp, @inject(CodeSyncService) private readonly codeSyncService: CodeSyncService, @inject(HeadlessLogController) private readonly headlessLogController: HeadlessLogController, @@ -102,6 +105,7 @@ export class Server { @inject(API) private readonly api: API, @inject(RedisSubscriber) private readonly redisSubscriber: RedisSubscriber, @inject(AnalyticsController) private readonly analyticsController: AnalyticsController, + @inject(ProbesAppProvider) private readonly probesAppProvider: ProbesAppProvider, ) {} public async init(app: express.Application) { @@ -115,6 +119,9 @@ export class Server { await this.typeOrm.connect(); log.info("connected to DB"); + // probes + this.probesApp = this.probesAppProvider.create(); + // metrics app.use((req: express.Request, res: express.Response, next: express.NextFunction) => { const startTime = Date.now(); @@ -321,8 +328,6 @@ export class Server { // Authorization: none app.use(this.oneTimeSecretServer.apiRouter); app.use(this.newsletterSubscriptionController.apiRouter); - app.use("/live", this.livenessController.apiRouter); - app.use("/ready", this.readinessController.apiRouter); app.use("/version", (req: express.Request, res: express.Response, next: express.NextFunction) => { res.send(this.config.version); }); @@ -354,6 +359,11 @@ export class Server { throw new Error("server cannot start, not initialized"); } + const probeServer = this.probesApp.listen(PROBES_PORT, () => { + log.info(`probes server listening on port: ${(probeServer.address()).port}`); + }); + this.probesServer = probeServer; + const httpServer = this.app.listen(port, () => { this.eventEmitter.emit(Server.EVENT_ON_START, httpServer); log.info(`server listening on port: ${(httpServer.address()).port}`); @@ -361,7 +371,7 @@ export class Server { this.httpServer = httpServer; if (this.monitoringApp) { - this.monitoringHttpServer = this.monitoringApp.listen(9500, "localhost", () => { + this.monitoringHttpServer = this.monitoringApp.listen(MONITORING_PORT, "localhost", () => { log.info( `monitoring app listening on port: ${(this.monitoringHttpServer!.address()).port}`, ); @@ -369,7 +379,7 @@ export class Server { } if (this.iamSessionApp) { - this.iamSessionAppServer = this.iamSessionApp.listen(9876, () => { + this.iamSessionAppServer = this.iamSessionApp.listen(IAM_SESSION_PORT, () => { log.info( `IAM session server listening on port: ${(this.iamSessionAppServer!.address()).port}`, ); @@ -409,6 +419,7 @@ export class Server { race(this.stopServer(this.httpServer), "stop httpserver"), race(this.stopServer(this.privateApiServer), "stop private api server"), race(this.stopServer(this.publicApiServer), "stop public api server"), + race(this.stopServer(this.probesServer), "stop probe server"), race((async () => this.disposables.dispose())(), "dispose disposables"), ]); diff --git a/install/installer/pkg/components/server/constants.go b/install/installer/pkg/components/server/constants.go index 8e024ada3ad2f9..32e4ffd740ce5a 100644 --- a/install/installer/pkg/components/server/constants.go +++ b/install/installer/pkg/components/server/constants.go @@ -25,6 +25,8 @@ const ( DebugNodePortName = "debugnode" ServicePort = 3000 personalAccessTokenSigningKeyMountPath = "/secrets/personal-access-token-signing-key" + ProbesPort = 9400 + ProbesPortName = "probes" AdminCredentialsSecretName = "admin-credentials" AdminCredentialsSecretMountPath = "/credentials/admin" diff --git a/install/installer/pkg/components/server/deployment.go b/install/installer/pkg/components/server/deployment.go index f80514df7bc596..07dd134c85fc11 100644 --- a/install/installer/pkg/components/server/deployment.go +++ b/install/installer/pkg/components/server/deployment.go @@ -363,7 +363,7 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) { Path: "/live", Port: intstr.IntOrString{ Type: intstr.Int, - IntVal: ContainerPort, + IntVal: ProbesPort, }, }, }, @@ -377,7 +377,7 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) { Path: "/ready", Port: intstr.IntOrString{ Type: intstr.Int, - IntVal: ContainerPort, + IntVal: ProbesPort, }, }, }, @@ -422,6 +422,10 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) { Name: PublicAPIName, ContainerPort: PublicAPIPort, }, + { + Name: ProbesPortName, + ContainerPort: ProbesPort, + }, }, // todo(sje): do we need to cater for serverContainer.env from values.yaml? Env: common.CustomizeEnvvar(ctx, Component, env), From 5a57076acf2811e87848e7a5108b02f33441975a Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 09:27:56 +0000 Subject: [PATCH 3/9] [memory-bank] task-related learnings Tool: gitpod/catfood.gitpod.cloud --- memory-bank/activeContext.md | 16 +++ memory-bank/progress.md | 12 +++ memory-bank/systemPatterns.md | 2 + prd/001-readinessprobe-server.md | 168 +++++++++++++++++++++++++++++++ 4 files changed, 198 insertions(+) create mode 100644 prd/001-readinessprobe-server.md diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md index cc91a0431e36a3..1a6d1034a7255b 100644 --- a/memory-bank/activeContext.md +++ b/memory-bank/activeContext.md @@ -144,5 +144,21 @@ Initial exploration of the Gitpod codebase has revealed: - Works by copying relevant sources into a separate file tree - Can also be run from inside the workspace - Manages complex dependencies between components +- **Server Health Checks**: The Gitpod server uses two distinct health check mechanisms: + - **Liveness Probe**: Checks the event loop lag to determine if the server is functioning properly + - **Readiness Probe**: Checks database and SpiceDB connectivity to ensure the server is ready to handle requests +- **Critical Dependencies**: The server has critical external dependencies that must be operational: + - **Database (TypeORM)**: Used for persistent storage + - **SpiceDB**: Used for authorization and permission management +- **Server Architecture Patterns**: + - The server uses dependency injection (Inversify) for component management + - Components are registered in `container-module.ts` and injected where needed + - The server exposes HTTP endpoints for health checks and other functionality + - Routes are registered in the `registerRoutes` method in `server.ts` + - New functionality typically requires: + 1. Creating a new controller/service class + 2. Registering it in the container module + 3. Injecting it where needed + 4. Updating any relevant configuration files This section will be continuously updated as new insights are gained through working with the system. diff --git a/memory-bank/progress.md b/memory-bank/progress.md index 266640c698f8fc..343713e6e1d6ce 100644 --- a/memory-bank/progress.md +++ b/memory-bank/progress.md @@ -111,6 +111,7 @@ As we begin working with the codebase, we have not yet identified specific issue - Documentation of second set of API components (image-builder-api, local-app-api, registry-facade-api) - Documentation of third set of API components (supervisor-api, usage-api, ws-daemon-api) - Documentation of fourth set of API components (ws-manager-api, ws-manager-bridge-api) +- First feature implementation: Server readiness probe for database and SpiceDB connectivity ### Upcoming Milestones - Development environment setup @@ -204,6 +205,17 @@ No specific blockers or dependencies have been identified yet. This section will - ws-manager-api: Interfaces for managing the lifecycle of workspaces in Kubernetes clusters - ws-manager-bridge-api: Interfaces for dynamic management of workspace clusters +- **3/17/2025**: + - Implemented server readiness probe: + - Created ReadinessController to check database and SpiceDB connectivity + - Updated server container module and route configuration + - Created PRD document for the readiness probe implementation + - Updated Kubernetes deployment configuration to add the readiness probe + - Updated memory bank with new learnings: + - Added information about server health checks and critical dependencies + - Documented server architecture patterns and dependency injection + - Added information about Kubernetes deployment configuration + ## Next Evaluation Point The next evaluation of progress will occur after: diff --git a/memory-bank/systemPatterns.md b/memory-bank/systemPatterns.md index 7fe8b7263c5933..f2665362e216f5 100644 --- a/memory-bank/systemPatterns.md +++ b/memory-bank/systemPatterns.md @@ -129,3 +129,5 @@ Workspaces are treated as immutable, with changes to configuration resulting in 6. **gRPC Communication**: Internal services communicate using gRPC for efficient, typed communication. 7. **Leeway Build System**: Custom build system for managing the complex dependencies between components. + +8. **Kubernetes Deployment Configuration**: All code that defines Kubernetes objects for deployable components lives in `install/installer`. This centralized approach ensures consistent deployment patterns across all components. diff --git a/prd/001-readinessprobe-server.md b/prd/001-readinessprobe-server.md new file mode 100644 index 00000000000000..fd481ef9d22d20 --- /dev/null +++ b/prd/001-readinessprobe-server.md @@ -0,0 +1,168 @@ +# Server Readiness Probe PRD + +## Overview + +This document outlines the implementation of a readiness probe for the Gitpod server deployment. The readiness probe will ensure that the server is only considered ready when it has established connections to both the database and SpiceDB authorizer. + +## Background + +Currently, the server deployment has a liveness probe that checks the event loop lag, but it does not have a readiness probe. This means that the server is considered ready to receive traffic as soon as the container starts, even if it hasn't established connections to critical dependencies like the database and SpiceDB. + +## Requirements + +1. Create a readiness endpoint in the server that checks: + - Database connectivity + - SpiceDB authorizer connectivity +2. Configure the Kubernetes deployment to use this endpoint as a readiness probe + +## Implementation Details + +### 1. Readiness Controller + +We've created a new `ReadinessController` class in `components/server/src/liveness/readiness-controller.ts` that: +- Checks database connectivity by executing a simple query +- Checks SpiceDB connectivity by attempting to get a client +- Returns a 200 status code only if both checks pass, otherwise returns a 503 status code + +```typescript +// components/server/src/liveness/readiness-controller.ts +import { injectable, inject } from "inversify"; +import express from "express"; +import { TypeORM } from "@gitpod/gitpod-db/lib"; +import { SpiceDBClientProvider } from "../authorization/spicedb"; +import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; + +@injectable() +export class ReadinessController { + @inject(TypeORM) protected readonly typeOrm: TypeORM; + @inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider; + + get apiRouter(): express.Router { + const router = express.Router(); + this.addReadinessHandler(router); + return router; + } + + protected addReadinessHandler(router: express.Router) { + router.get("/", async (_, res) => { + try { + // Check database connection + const dbConnection = await this.checkDatabaseConnection(); + if (!dbConnection) { + log.warn("Readiness check failed: Database connection failed"); + res.status(503).send("Database connection failed"); + return; + } + + // Check SpiceDB connection + const spiceDBConnection = await this.checkSpiceDBConnection(); + if (!spiceDBConnection) { + log.warn("Readiness check failed: SpiceDB connection failed"); + res.status(503).send("SpiceDB connection failed"); + return; + } + + // Both connections are good + res.status(200).send("Ready"); + } catch (error) { + log.error("Readiness check failed", error); + res.status(503).send("Readiness check failed"); + } + }); + } + + private async checkDatabaseConnection(): Promise { + try { + const connection = await this.typeOrm.getConnection(); + // Simple query to verify connection is working + await connection.query("SELECT 1"); + return true; + } catch (error) { + log.error("Database connection check failed", error); + return false; + } + } + + private async checkSpiceDBConnection(): Promise { + try { + // Just getting the client without error is a basic check + // If the client is not available, getClient() will throw an error + this.spiceDBClientProvider.getClient(); + return true; + } catch (error) { + log.error("SpiceDB connection check failed", error); + return false; + } + } +} +``` + +### 2. Server Configuration + +We've updated the server's container module to include the `ReadinessController`: + +```typescript +// In container-module.ts +import { ReadinessController } from "./liveness/readiness-controller"; + +// In the productionContainerModule +bind(ReadinessController).toSelf().inSingletonScope(); +``` + +We've also updated the server's route configuration to register the readiness endpoint: + +```typescript +// In server.ts +import { ReadinessController } from "./liveness/readiness-controller"; + +// In the constructor +@inject(ReadinessController) private readonly readinessController: ReadinessController, + +// In registerRoutes method +app.use("/ready", this.readinessController.apiRouter); +``` + +### 3. Kubernetes Deployment + +We need to update the server deployment in `install/installer/pkg/components/server/deployment.go` to add a readiness probe: + +```go +ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/ready", + Port: intstr.IntOrString{ + Type: intstr.Int, + IntVal: ContainerPort, + }, + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 10, + FailureThreshold: 3, +}, +``` + +## Testing + +The readiness probe should be tested to ensure: + +1. The server is only considered ready when both database and SpiceDB connections are established +2. The server is not considered ready if either connection fails +3. The server becomes ready again when connections are re-established + +## Deployment Considerations + +- The readiness probe has an initial delay of 30 seconds to allow the server time to establish connections +- The probe runs every 10 seconds +- The probe allows up to 3 failures before marking the pod as not ready + +## Future Improvements + +- Add more sophisticated checks for SpiceDB connectivity, such as a simple permission check +- Add metrics for readiness probe failures +- Consider adding more dependencies to the readiness check as needed + +## Conclusion + +This implementation ensures that the server is only considered ready when it has established connections to both the database and SpiceDB authorizer. This improves the reliability of the deployment by preventing traffic from being sent to instances that are not fully initialized. From f417382b2b2eae8f631e2024a1ca95fe0930cf7f Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 10:04:30 +0000 Subject: [PATCH 4/9] [server] Introduce `server_readiness_probe` feature flag so we can disable the ReadinessProbe if required Tool: gitpod/catfood.gitpod.cloud --- .../server/src/liveness/readiness-controller.ts | 14 ++++++++++++++ memory-bank/activeContext.md | 1 + memory-bank/progress.md | 2 ++ prd/001-readinessprobe-server.md | 13 +++++++++++++ 4 files changed, 30 insertions(+) diff --git a/components/server/src/liveness/readiness-controller.ts b/components/server/src/liveness/readiness-controller.ts index 70b628be7359da..2f48fffd1adf9c 100644 --- a/components/server/src/liveness/readiness-controller.ts +++ b/components/server/src/liveness/readiness-controller.ts @@ -10,6 +10,7 @@ import { TypeORM } from "@gitpod/gitpod-db/lib"; import { SpiceDBClientProvider } from "../authorization/spicedb"; import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; import { ReadSchemaRequest } from "@authzed/authzed-node/dist/src/v1"; +import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server"; @injectable() export class ReadinessController { @@ -25,6 +26,19 @@ export class ReadinessController { protected addReadinessHandler(router: express.Router) { router.get("/", async (_, res) => { try { + // Check feature flag first + const readinessProbeEnabled = await getExperimentsClientForBackend().getValueAsync( + "server_readiness_probe", + true, // Default to readiness probe, skip if false + {}, + ); + + if (!readinessProbeEnabled) { + log.debug("Readiness check skipped due to feature flag"); + res.status(200); + return; + } + // Check database connection const dbConnection = await this.checkDatabaseConnection(); if (!dbConnection) { diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md index 1a6d1034a7255b..5b374bf07fffa1 100644 --- a/memory-bank/activeContext.md +++ b/memory-bank/activeContext.md @@ -147,6 +147,7 @@ Initial exploration of the Gitpod codebase has revealed: - **Server Health Checks**: The Gitpod server uses two distinct health check mechanisms: - **Liveness Probe**: Checks the event loop lag to determine if the server is functioning properly - **Readiness Probe**: Checks database and SpiceDB connectivity to ensure the server is ready to handle requests + - Controlled by a ConfigCat feature flag `server_readiness_probe` (default: true) that can bypass the actual checks - **Critical Dependencies**: The server has critical external dependencies that must be operational: - **Database (TypeORM)**: Used for persistent storage - **SpiceDB**: Used for authorization and permission management diff --git a/memory-bank/progress.md b/memory-bank/progress.md index 343713e6e1d6ce..a07ce4803b2ccb 100644 --- a/memory-bank/progress.md +++ b/memory-bank/progress.md @@ -211,10 +211,12 @@ No specific blockers or dependencies have been identified yet. This section will - Updated server container module and route configuration - Created PRD document for the readiness probe implementation - Updated Kubernetes deployment configuration to add the readiness probe + - Added ConfigCat feature flag `server_readiness_probe` to control readiness checks - Updated memory bank with new learnings: - Added information about server health checks and critical dependencies - Documented server architecture patterns and dependency injection - Added information about Kubernetes deployment configuration + - Documented feature flag implementation for readiness probe ## Next Evaluation Point diff --git a/prd/001-readinessprobe-server.md b/prd/001-readinessprobe-server.md index fd481ef9d22d20..8ff423a43b3714 100644 --- a/prd/001-readinessprobe-server.md +++ b/prd/001-readinessprobe-server.md @@ -157,6 +157,19 @@ The readiness probe should be tested to ensure: - The probe runs every 10 seconds - The probe allows up to 3 failures before marking the pod as not ready +## Feature Flag Control + +The readiness probe implementation includes a ConfigCat feature flag called `server_readiness_probe` that controls whether the actual connectivity checks are performed: + +- When the flag is set to `true` (default): The readiness probe will always return a 200 status code, bypassing the actual database and SpiceDB connectivity checks +- When the flag is set to `false`: The readiness probe will perform the actual checks and return the appropriate status code based on the results + +This feature flag provides several benefits: + +1. **Gradual Rollout**: Allows for a gradual rollout of the readiness probe, which is useful for testing in production environments +2. **Emergency Override**: If the readiness probe causes issues in production, the flag can be quickly toggled to bypass the checks without requiring a code deployment +3. **Environment-Specific Configuration**: Different environments (dev, staging, production) can have different settings for the readiness probe + ## Future Improvements - Add more sophisticated checks for SpiceDB connectivity, such as a simple permission check From de921c871b97534abdb38283fed3d7dc29d56e90 Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 10:30:23 +0000 Subject: [PATCH 5/9] docs: formalize Product Requirements Document workflow - Add PRD workflow to systemPatterns.md as a standardized development process - Update .clinerules with instructions to follow the PRD workflow - Update activeContext.md and progress.md to reference the new workflow This formalizes the process we used for implementing the server readiness probe feature. Tool: gitpod/catfood.gitpod.cloud --- memory-bank/.clinerules | 9 +++++++++ memory-bank/activeContext.md | 3 +++ memory-bank/progress.md | 4 ++++ memory-bank/systemPatterns.md | 14 ++++++++++++++ 4 files changed, 30 insertions(+) diff --git a/memory-bank/.clinerules b/memory-bank/.clinerules index 599a0c9ec29b52..b4c1a7b5c25a01 100644 --- a/memory-bank/.clinerules +++ b/memory-bank/.clinerules @@ -26,6 +26,15 @@ This file captures important patterns, preferences, and project intelligence tha ## Development Workflow +- **Feature Development Process**: + - Follow the Product Requirements Document (PRD) workflow documented in systemPatterns.md under "Development Workflows" + - Create PRD documents in the `prd/` directory with naming convention `NNN-featurename.md` + - Include standard sections: Overview, Background, Requirements, Implementation Details, Testing, Deployment Considerations, Future Improvements + - Use Plan Mode for requirements gathering and planning + - Use Act Mode for implementation and documentation updates + - Always update memory bank with new knowledge gained during implementation + - Reference the PRD in commit messages and documentation + - **Build Approaches**: - **In-tree builds** (primary for local development): - TypeScript components: Use `yarn` commands defined in package.json diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md index 5b374bf07fffa1..e35aaefb63c77e 100644 --- a/memory-bank/activeContext.md +++ b/memory-bank/activeContext.md @@ -10,10 +10,13 @@ Key areas of focus include: 2. **Component Documentation**: Creating detailed documentation for each component 3. **Development Workflow**: Understanding how to effectively develop and test changes 4. **Documentation**: Maintaining a comprehensive memory bank for future reference +5. **Standardized Workflows**: Establishing and documenting standardized workflows for development activities ## Recent Changes - Created the initial memory bank structure with core files +- Established standardized workflows for development activities: + - Product Requirements Document (PRD) workflow for feature development - Added a components subdirectory to the memory bank - Created detailed documentation for key components: - blobserve: Service that provides static assets from OCI images diff --git a/memory-bank/progress.md b/memory-bank/progress.md index a07ce4803b2ccb..c5bb9c89104133 100644 --- a/memory-bank/progress.md +++ b/memory-bank/progress.md @@ -217,6 +217,10 @@ No specific blockers or dependencies have been identified yet. This section will - Documented server architecture patterns and dependency injection - Added information about Kubernetes deployment configuration - Documented feature flag implementation for readiness probe + - Established standardized development workflows: + - Created `workflows.md` to document standardized development processes + - Documented the Product Requirements Document (PRD) workflow + - Updated `activeContext.md` to reference the new workflows ## Next Evaluation Point diff --git a/memory-bank/systemPatterns.md b/memory-bank/systemPatterns.md index f2665362e216f5..fc23decbcf0192 100644 --- a/memory-bank/systemPatterns.md +++ b/memory-bank/systemPatterns.md @@ -131,3 +131,17 @@ Workspaces are treated as immutable, with changes to configuration resulting in 7. **Leeway Build System**: Custom build system for managing the complex dependencies between components. 8. **Kubernetes Deployment Configuration**: All code that defines Kubernetes objects for deployable components lives in `install/installer`. This centralized approach ensures consistent deployment patterns across all components. + +## Development Workflows + +### Product Requirements Document (PRD) Workflow +Gitpod uses a structured PRD workflow for feature development to ensure proper planning, documentation, and implementation: + +1. **Requirements Gathering** (Plan Mode): Understand the problem, explore existing components, gather information +2. **PRD Creation** (Plan Mode): Create a detailed document in `prd/` with standardized sections +3. **Implementation Planning** (Plan Mode): Identify files to modify and plan the approach +4. **Implementation** (Act Mode): Create/modify necessary files following the plan +5. **Documentation Update** (Act Mode): Update memory bank with new knowledge +6. **Verification**: Ensure implementation meets requirements and documentation is complete + +This workflow ensures thorough planning, clear documentation, and knowledge preservation for all feature development. From 5e4a03354a41aa739ebef75a797e745e67810a42 Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 10:54:20 +0000 Subject: [PATCH 6/9] [server] ReadinessProbe: add redis as dependency Tool: gitpod/catfood.gitpod.cloud --- .../src/liveness/readiness-controller.ts | 24 ++++++++++++++++++- memory-bank/activeContext.md | 3 ++- memory-bank/progress.md | 5 ++++ prd/001-readinessprobe-server.md | 15 +++++++----- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/components/server/src/liveness/readiness-controller.ts b/components/server/src/liveness/readiness-controller.ts index 2f48fffd1adf9c..f752625d0d4ceb 100644 --- a/components/server/src/liveness/readiness-controller.ts +++ b/components/server/src/liveness/readiness-controller.ts @@ -11,11 +11,13 @@ import { SpiceDBClientProvider } from "../authorization/spicedb"; import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; import { ReadSchemaRequest } from "@authzed/authzed-node/dist/src/v1"; import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server"; +import { Redis } from "ioredis"; @injectable() export class ReadinessController { @inject(TypeORM) protected readonly typeOrm: TypeORM; @inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider; + @inject(Redis) protected readonly redis: Redis; get apiRouter(): express.Router { const router = express.Router(); @@ -55,7 +57,15 @@ export class ReadinessController { return; } - // Both connections are good + // Check Redis connection + const redisConnection = await this.checkRedisConnection(); + if (!redisConnection) { + log.warn("Readiness check failed: Redis connection failed"); + res.status(503).send("Redis connection failed"); + return; + } + + // All connections are good res.status(200).send("Ready"); } catch (error) { log.error("Readiness check failed", error); @@ -91,4 +101,16 @@ export class ReadinessController { return false; } } + + private async checkRedisConnection(): Promise { + try { + // Simple PING command to verify connection is working + const result = await this.redis.ping(); + log.debug("Redis connection check successful", { result }); + return result === "PONG"; + } catch (error) { + log.error("Redis connection check failed", error); + return false; + } + } } diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md index e35aaefb63c77e..ae6b25f3eb1d28 100644 --- a/memory-bank/activeContext.md +++ b/memory-bank/activeContext.md @@ -149,11 +149,12 @@ Initial exploration of the Gitpod codebase has revealed: - Manages complex dependencies between components - **Server Health Checks**: The Gitpod server uses two distinct health check mechanisms: - **Liveness Probe**: Checks the event loop lag to determine if the server is functioning properly - - **Readiness Probe**: Checks database and SpiceDB connectivity to ensure the server is ready to handle requests + - **Readiness Probe**: Checks database, SpiceDB, and Redis connectivity to ensure the server is ready to handle requests - Controlled by a ConfigCat feature flag `server_readiness_probe` (default: true) that can bypass the actual checks - **Critical Dependencies**: The server has critical external dependencies that must be operational: - **Database (TypeORM)**: Used for persistent storage - **SpiceDB**: Used for authorization and permission management + - **Redis**: Used for caching, pub/sub messaging, and distributed locking - **Server Architecture Patterns**: - The server uses dependency injection (Inversify) for component management - Components are registered in `container-module.ts` and injected where needed diff --git a/memory-bank/progress.md b/memory-bank/progress.md index c5bb9c89104133..d8176fb89e5cd7 100644 --- a/memory-bank/progress.md +++ b/memory-bank/progress.md @@ -212,11 +212,16 @@ No specific blockers or dependencies have been identified yet. This section will - Created PRD document for the readiness probe implementation - Updated Kubernetes deployment configuration to add the readiness probe - Added ConfigCat feature flag `server_readiness_probe` to control readiness checks + - Enhanced server readiness probe with Redis health check: + - Added Redis connectivity check to the ReadinessController + - Updated PRD document to include Redis in the list of checked dependencies + - Ensured the feature flag continues to work with the new Redis check - Updated memory bank with new learnings: - Added information about server health checks and critical dependencies - Documented server architecture patterns and dependency injection - Added information about Kubernetes deployment configuration - Documented feature flag implementation for readiness probe + - Added Redis as a critical dependency for the server - Established standardized development workflows: - Created `workflows.md` to document standardized development processes - Documented the Product Requirements Document (PRD) workflow diff --git a/prd/001-readinessprobe-server.md b/prd/001-readinessprobe-server.md index 8ff423a43b3714..dd8ffdfcdb4bff 100644 --- a/prd/001-readinessprobe-server.md +++ b/prd/001-readinessprobe-server.md @@ -2,7 +2,7 @@ ## Overview -This document outlines the implementation of a readiness probe for the Gitpod server deployment. The readiness probe will ensure that the server is only considered ready when it has established connections to both the database and SpiceDB authorizer. +This document outlines the implementation of a readiness probe for the Gitpod server deployment. The readiness probe will ensure that the server is only considered ready when it has established connections to the database, SpiceDB authorizer, and Redis. ## Background @@ -13,6 +13,7 @@ Currently, the server deployment has a liveness probe that checks the event loop 1. Create a readiness endpoint in the server that checks: - Database connectivity - SpiceDB authorizer connectivity + - Redis connectivity 2. Configure the Kubernetes deployment to use this endpoint as a readiness probe ## Implementation Details @@ -22,7 +23,8 @@ Currently, the server deployment has a liveness probe that checks the event loop We've created a new `ReadinessController` class in `components/server/src/liveness/readiness-controller.ts` that: - Checks database connectivity by executing a simple query - Checks SpiceDB connectivity by attempting to get a client -- Returns a 200 status code only if both checks pass, otherwise returns a 503 status code +- Checks Redis connectivity by executing a PING command +- Returns a 200 status code only if all checks pass, otherwise returns a 503 status code ```typescript // components/server/src/liveness/readiness-controller.ts @@ -147,8 +149,8 @@ ReadinessProbe: &corev1.Probe{ The readiness probe should be tested to ensure: -1. The server is only considered ready when both database and SpiceDB connections are established -2. The server is not considered ready if either connection fails +1. The server is only considered ready when database, SpiceDB, and Redis connections are established +2. The server is not considered ready if any connection fails 3. The server becomes ready again when connections are re-established ## Deployment Considerations @@ -161,7 +163,7 @@ The readiness probe should be tested to ensure: The readiness probe implementation includes a ConfigCat feature flag called `server_readiness_probe` that controls whether the actual connectivity checks are performed: -- When the flag is set to `true` (default): The readiness probe will always return a 200 status code, bypassing the actual database and SpiceDB connectivity checks +- When the flag is set to `true` (default): The readiness probe will always return a 200 status code, bypassing the actual database, SpiceDB, and Redis connectivity checks - When the flag is set to `false`: The readiness probe will perform the actual checks and return the appropriate status code based on the results This feature flag provides several benefits: @@ -173,9 +175,10 @@ This feature flag provides several benefits: ## Future Improvements - Add more sophisticated checks for SpiceDB connectivity, such as a simple permission check +- Add more sophisticated checks for Redis connectivity, such as a simple key-value operation - Add metrics for readiness probe failures - Consider adding more dependencies to the readiness check as needed ## Conclusion -This implementation ensures that the server is only considered ready when it has established connections to both the database and SpiceDB authorizer. This improves the reliability of the deployment by preventing traffic from being sent to instances that are not fully initialized. +This implementation ensures that the server is only considered ready when it has established connections to the database, SpiceDB authorizer, and Redis. This improves the reliability of the deployment by preventing traffic from being sent to instances that are not fully initialized. From 5819f53874f251203befb5446590642cd3dcb5eb Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 14:17:08 +0000 Subject: [PATCH 7/9] review comments Tool: gitpod/catfood.gitpod.cloud --- .../pkg/components/server/deployment.go | 2 +- prd/001-readinessprobe-server.md | 184 ------------------ 2 files changed, 1 insertion(+), 185 deletions(-) delete mode 100644 prd/001-readinessprobe-server.md diff --git a/install/installer/pkg/components/server/deployment.go b/install/installer/pkg/components/server/deployment.go index 07dd134c85fc11..c278d7b29013ec 100644 --- a/install/installer/pkg/components/server/deployment.go +++ b/install/installer/pkg/components/server/deployment.go @@ -383,7 +383,7 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) { }, InitialDelaySeconds: 5, PeriodSeconds: 10, - FailureThreshold: 12, // try for 120 seconds, then re-start the container + FailureThreshold: 12, // try for 120 seconds }, SecurityContext: &corev1.SecurityContext{ Privileged: pointer.Bool(false), diff --git a/prd/001-readinessprobe-server.md b/prd/001-readinessprobe-server.md deleted file mode 100644 index dd8ffdfcdb4bff..00000000000000 --- a/prd/001-readinessprobe-server.md +++ /dev/null @@ -1,184 +0,0 @@ -# Server Readiness Probe PRD - -## Overview - -This document outlines the implementation of a readiness probe for the Gitpod server deployment. The readiness probe will ensure that the server is only considered ready when it has established connections to the database, SpiceDB authorizer, and Redis. - -## Background - -Currently, the server deployment has a liveness probe that checks the event loop lag, but it does not have a readiness probe. This means that the server is considered ready to receive traffic as soon as the container starts, even if it hasn't established connections to critical dependencies like the database and SpiceDB. - -## Requirements - -1. Create a readiness endpoint in the server that checks: - - Database connectivity - - SpiceDB authorizer connectivity - - Redis connectivity -2. Configure the Kubernetes deployment to use this endpoint as a readiness probe - -## Implementation Details - -### 1. Readiness Controller - -We've created a new `ReadinessController` class in `components/server/src/liveness/readiness-controller.ts` that: -- Checks database connectivity by executing a simple query -- Checks SpiceDB connectivity by attempting to get a client -- Checks Redis connectivity by executing a PING command -- Returns a 200 status code only if all checks pass, otherwise returns a 503 status code - -```typescript -// components/server/src/liveness/readiness-controller.ts -import { injectable, inject } from "inversify"; -import express from "express"; -import { TypeORM } from "@gitpod/gitpod-db/lib"; -import { SpiceDBClientProvider } from "../authorization/spicedb"; -import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; - -@injectable() -export class ReadinessController { - @inject(TypeORM) protected readonly typeOrm: TypeORM; - @inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider; - - get apiRouter(): express.Router { - const router = express.Router(); - this.addReadinessHandler(router); - return router; - } - - protected addReadinessHandler(router: express.Router) { - router.get("/", async (_, res) => { - try { - // Check database connection - const dbConnection = await this.checkDatabaseConnection(); - if (!dbConnection) { - log.warn("Readiness check failed: Database connection failed"); - res.status(503).send("Database connection failed"); - return; - } - - // Check SpiceDB connection - const spiceDBConnection = await this.checkSpiceDBConnection(); - if (!spiceDBConnection) { - log.warn("Readiness check failed: SpiceDB connection failed"); - res.status(503).send("SpiceDB connection failed"); - return; - } - - // Both connections are good - res.status(200).send("Ready"); - } catch (error) { - log.error("Readiness check failed", error); - res.status(503).send("Readiness check failed"); - } - }); - } - - private async checkDatabaseConnection(): Promise { - try { - const connection = await this.typeOrm.getConnection(); - // Simple query to verify connection is working - await connection.query("SELECT 1"); - return true; - } catch (error) { - log.error("Database connection check failed", error); - return false; - } - } - - private async checkSpiceDBConnection(): Promise { - try { - // Just getting the client without error is a basic check - // If the client is not available, getClient() will throw an error - this.spiceDBClientProvider.getClient(); - return true; - } catch (error) { - log.error("SpiceDB connection check failed", error); - return false; - } - } -} -``` - -### 2. Server Configuration - -We've updated the server's container module to include the `ReadinessController`: - -```typescript -// In container-module.ts -import { ReadinessController } from "./liveness/readiness-controller"; - -// In the productionContainerModule -bind(ReadinessController).toSelf().inSingletonScope(); -``` - -We've also updated the server's route configuration to register the readiness endpoint: - -```typescript -// In server.ts -import { ReadinessController } from "./liveness/readiness-controller"; - -// In the constructor -@inject(ReadinessController) private readonly readinessController: ReadinessController, - -// In registerRoutes method -app.use("/ready", this.readinessController.apiRouter); -``` - -### 3. Kubernetes Deployment - -We need to update the server deployment in `install/installer/pkg/components/server/deployment.go` to add a readiness probe: - -```go -ReadinessProbe: &corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - HTTPGet: &corev1.HTTPGetAction{ - Path: "/ready", - Port: intstr.IntOrString{ - Type: intstr.Int, - IntVal: ContainerPort, - }, - }, - }, - InitialDelaySeconds: 30, - PeriodSeconds: 10, - FailureThreshold: 3, -}, -``` - -## Testing - -The readiness probe should be tested to ensure: - -1. The server is only considered ready when database, SpiceDB, and Redis connections are established -2. The server is not considered ready if any connection fails -3. The server becomes ready again when connections are re-established - -## Deployment Considerations - -- The readiness probe has an initial delay of 30 seconds to allow the server time to establish connections -- The probe runs every 10 seconds -- The probe allows up to 3 failures before marking the pod as not ready - -## Feature Flag Control - -The readiness probe implementation includes a ConfigCat feature flag called `server_readiness_probe` that controls whether the actual connectivity checks are performed: - -- When the flag is set to `true` (default): The readiness probe will always return a 200 status code, bypassing the actual database, SpiceDB, and Redis connectivity checks -- When the flag is set to `false`: The readiness probe will perform the actual checks and return the appropriate status code based on the results - -This feature flag provides several benefits: - -1. **Gradual Rollout**: Allows for a gradual rollout of the readiness probe, which is useful for testing in production environments -2. **Emergency Override**: If the readiness probe causes issues in production, the flag can be quickly toggled to bypass the checks without requiring a code deployment -3. **Environment-Specific Configuration**: Different environments (dev, staging, production) can have different settings for the readiness probe - -## Future Improvements - -- Add more sophisticated checks for SpiceDB connectivity, such as a simple permission check -- Add more sophisticated checks for Redis connectivity, such as a simple key-value operation -- Add metrics for readiness probe failures -- Consider adding more dependencies to the readiness check as needed - -## Conclusion - -This implementation ensures that the server is only considered ready when it has established connections to the database, SpiceDB authorizer, and Redis. This improves the reliability of the deployment by preventing traffic from being sent to instances that are not fully initialized. From 6c8b811838d6881194bae8abb92d7e3876005be4 Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 14:25:06 +0000 Subject: [PATCH 8/9] [dev] Remove outdated gopls config Tool: gitpod/catfood.gitpod.cloud --- components/content-service/content-service.code-workspace | 1 - components/image-builder-mk3/image-builder.code-workspace | 1 - components/ws-manager-mk2/ws-manager-mk2.code-workspace | 1 - gitpod-ws.code-workspace | 1 - 4 files changed, 4 deletions(-) diff --git a/components/content-service/content-service.code-workspace b/components/content-service/content-service.code-workspace index 5e2ae90518651e..1f8200dfa66d87 100644 --- a/components/content-service/content-service.code-workspace +++ b/components/content-service/content-service.code-workspace @@ -43,7 +43,6 @@ }, "go.lintTool": "golangci-lint", "gopls": { - "allowModfileModifications": true } } } diff --git a/components/image-builder-mk3/image-builder.code-workspace b/components/image-builder-mk3/image-builder.code-workspace index 6915b9ac74bc2c..4451098c282779 100644 --- a/components/image-builder-mk3/image-builder.code-workspace +++ b/components/image-builder-mk3/image-builder.code-workspace @@ -45,7 +45,6 @@ }, "go.lintTool": "golangci-lint", "gopls": { - "allowModfileModifications": true } } } diff --git a/components/ws-manager-mk2/ws-manager-mk2.code-workspace b/components/ws-manager-mk2/ws-manager-mk2.code-workspace index b1edf6ddc7a894..0b73e8d6ad487f 100644 --- a/components/ws-manager-mk2/ws-manager-mk2.code-workspace +++ b/components/ws-manager-mk2/ws-manager-mk2.code-workspace @@ -46,7 +46,6 @@ }, "go.lintTool": "golangci-lint", "gopls": { - "allowModfileModifications": true } } } diff --git a/gitpod-ws.code-workspace b/gitpod-ws.code-workspace index 9cb0715180f369..c119e4269bcd8a 100644 --- a/gitpod-ws.code-workspace +++ b/gitpod-ws.code-workspace @@ -162,7 +162,6 @@ "go.lintTool": "golangci-lint", "go.lintFlags": ["-disable", "govet,errcheck,staticcheck", "--allow-parallel-runners", "--timeout", "15m"], "gopls": { - "allowModfileModifications": true }, "prettier.configPath": "/workspace/gitpod/.prettierrc.json" } From 6437e49c4c7f4024c4c063ec9b87729eef5abc0f Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 17 Mar 2025 14:43:16 +0000 Subject: [PATCH 9/9] [server] Fix import Tool: gitpod/catfood.gitpod.cloud --- components/server/src/liveness/readiness-controller.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/server/src/liveness/readiness-controller.ts b/components/server/src/liveness/readiness-controller.ts index f752625d0d4ceb..59c4c354ba6157 100644 --- a/components/server/src/liveness/readiness-controller.ts +++ b/components/server/src/liveness/readiness-controller.ts @@ -9,7 +9,7 @@ import express from "express"; import { TypeORM } from "@gitpod/gitpod-db/lib"; import { SpiceDBClientProvider } from "../authorization/spicedb"; import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; -import { ReadSchemaRequest } from "@authzed/authzed-node/dist/src/v1"; +import { v1 } from "@authzed/authzed-node"; import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server"; import { Redis } from "ioredis"; @@ -91,7 +91,7 @@ export class ReadinessController { const client = this.spiceDBClientProvider.getClient(); // Send a request, to verify that the connection works - const req = ReadSchemaRequest.create({}); + const req = v1.ReadSchemaRequest.create({}); const response = await client.readSchema(req); log.debug("SpiceDB connection check successful", { schemaLength: response.schemaText.length });