diff --git a/components/content-service/content-service.code-workspace b/components/content-service/content-service.code-workspace index 5e2ae90518651e..1f8200dfa66d87 100644 --- a/components/content-service/content-service.code-workspace +++ b/components/content-service/content-service.code-workspace @@ -43,7 +43,6 @@ }, "go.lintTool": "golangci-lint", "gopls": { - "allowModfileModifications": true } } } diff --git a/components/image-builder-mk3/image-builder.code-workspace b/components/image-builder-mk3/image-builder.code-workspace index 6915b9ac74bc2c..4451098c282779 100644 --- a/components/image-builder-mk3/image-builder.code-workspace +++ b/components/image-builder-mk3/image-builder.code-workspace @@ -45,7 +45,6 @@ }, "go.lintTool": "golangci-lint", "gopls": { - "allowModfileModifications": true } } } diff --git a/components/server/src/container-module.ts b/components/server/src/container-module.ts index 58d09ef485b4c5..0a1af2352bc012 100644 --- a/components/server/src/container-module.ts +++ b/components/server/src/container-module.ts @@ -71,6 +71,7 @@ import { WebhookEventGarbageCollector } from "./jobs/webhook-gc"; import { WorkspaceGarbageCollector } from "./jobs/workspace-gc"; import { LinkedInService } from "./linkedin-service"; import { LivenessController } from "./liveness/liveness-controller"; +import { ReadinessController } from "./liveness/readiness-controller"; import { RedisSubscriber } from "./messaging/redis-subscriber"; import { MonitoringEndpointsApp } from "./monitoring-endpoints"; import { OAuthController } from "./oauth-server/oauth-controller"; @@ -135,6 +136,7 @@ import { AnalyticsController } from "./analytics-controller"; import { InstallationAdminCleanup } from "./jobs/installation-admin-cleanup"; import { AuditLogService } from "./audit/AuditLogService"; import { AuditLogGarbageCollectorJob } from "./jobs/auditlog-gc"; +import { ProbesApp } from "./liveness/probes"; export const productionContainerModule = new ContainerModule( (bind, unbind, isBound, rebind, unbindAsync, onActivation, onDeactivation) => { @@ -240,7 +242,10 @@ export const productionContainerModule = new ContainerModule( bind(IWorkspaceManagerClientCallMetrics).toService(IClientCallMetrics); bind(WorkspaceDownloadService).toSelf().inSingletonScope(); + + bind(ProbesApp).toSelf().inSingletonScope(); bind(LivenessController).toSelf().inSingletonScope(); + bind(ReadinessController).toSelf().inSingletonScope(); bind(OneTimeSecretServer).toSelf().inSingletonScope(); diff --git a/components/server/src/liveness/probes.ts b/components/server/src/liveness/probes.ts new file mode 100644 index 00000000000000..160c02de5af4c3 --- /dev/null +++ b/components/server/src/liveness/probes.ts @@ -0,0 +1,25 @@ +/** + * Copyright (c) 2025 Gitpod GmbH. All rights reserved. + * Licensed under the GNU Affero General Public License (AGPL). + * See License.AGPL.txt in the project root for license information. + */ + +import express from "express"; +import { inject, injectable } from "inversify"; +import { LivenessController } from "./liveness-controller"; +import { ReadinessController } from "./readiness-controller"; + +@injectable() +export class ProbesApp { + constructor( + @inject(LivenessController) protected readonly livenessController: LivenessController, + @inject(ReadinessController) protected readonly readinessController: ReadinessController, + ) {} + + public create(): express.Application { + const probesApp = express(); + probesApp.use("/live", this.livenessController.apiRouter); + probesApp.use("/ready", this.readinessController.apiRouter); + return probesApp; + } +} diff --git a/components/server/src/liveness/readiness-controller.ts b/components/server/src/liveness/readiness-controller.ts new file mode 100644 index 00000000000000..59c4c354ba6157 --- /dev/null +++ b/components/server/src/liveness/readiness-controller.ts @@ -0,0 +1,116 @@ +/** + * Copyright (c) 2025 Gitpod GmbH. All rights reserved. + * Licensed under the GNU Affero General Public License (AGPL). + * See License.AGPL.txt in the project root for license information. + */ + +import { injectable, inject } from "inversify"; +import express from "express"; +import { TypeORM } from "@gitpod/gitpod-db/lib"; +import { SpiceDBClientProvider } from "../authorization/spicedb"; +import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; +import { v1 } from "@authzed/authzed-node"; +import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server"; +import { Redis } from "ioredis"; + +@injectable() +export class ReadinessController { + @inject(TypeORM) protected readonly typeOrm: TypeORM; + @inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider; + @inject(Redis) protected readonly redis: Redis; + + get apiRouter(): express.Router { + const router = express.Router(); + this.addReadinessHandler(router); + return router; + } + + protected addReadinessHandler(router: express.Router) { + router.get("/", async (_, res) => { + try { + // Check feature flag first + const readinessProbeEnabled = await getExperimentsClientForBackend().getValueAsync( + "server_readiness_probe", + true, // Default to readiness probe, skip if false + {}, + ); + + if (!readinessProbeEnabled) { + log.debug("Readiness check skipped due to feature flag"); + res.status(200); + return; + } + + // Check database connection + const dbConnection = await this.checkDatabaseConnection(); + if (!dbConnection) { + log.warn("Readiness check failed: Database connection failed"); + res.status(503).send("Database connection failed"); + return; + } + + // Check SpiceDB connection + const spiceDBConnection = await this.checkSpiceDBConnection(); + if (!spiceDBConnection) { + log.warn("Readiness check failed: SpiceDB connection failed"); + res.status(503).send("SpiceDB connection failed"); + return; + } + + // Check Redis connection + const redisConnection = await this.checkRedisConnection(); + if (!redisConnection) { + log.warn("Readiness check failed: Redis connection failed"); + res.status(503).send("Redis connection failed"); + return; + } + + // All connections are good + res.status(200).send("Ready"); + } catch (error) { + log.error("Readiness check failed", error); + res.status(503).send("Readiness check failed"); + } + }); + } + + private async checkDatabaseConnection(): Promise { + try { + const connection = await this.typeOrm.getConnection(); + // Simple query to verify connection is working + await connection.query("SELECT 1"); + return true; + } catch (error) { + log.error("Database connection check failed", error); + return false; + } + } + + private async checkSpiceDBConnection(): Promise { + try { + const client = this.spiceDBClientProvider.getClient(); + + // Send a request, to verify that the connection works + const req = v1.ReadSchemaRequest.create({}); + const response = await client.readSchema(req); + log.debug("SpiceDB connection check successful", { schemaLength: response.schemaText.length }); + + return true; + } catch (error) { + log.error("SpiceDB connection check failed", error); + return false; + } + } + + private async checkRedisConnection(): Promise { + try { + // Simple PING command to verify connection is working + const result = await this.redis.ping(); + log.debug("Redis connection check successful", { result }); + return result === "PONG"; + } catch (error) { + log.error("Redis connection check failed", error); + return false; + } + } +} diff --git a/components/server/src/server.ts b/components/server/src/server.ts index 57facbb12560ab..8371e6111c1a6e 100644 --- a/components/server/src/server.ts +++ b/components/server/src/server.ts @@ -36,7 +36,6 @@ import { NewsletterSubscriptionController } from "./user/newsletter-subscription import { Config } from "./config"; import { DebugApp } from "@gitpod/gitpod-protocol/lib/util/debug-app"; import { WsConnectionHandler } from "./express/ws-connection-handler"; -import { LivenessController } from "./liveness/liveness-controller"; import { IamSessionApp } from "./iam/iam-session-app"; import { API } from "./api/server"; import { GithubApp } from "./prebuilds/github-app"; @@ -53,6 +52,11 @@ import { } from "./workspace/headless-log-service"; import { runWithRequestContext } from "./util/request-context"; import { AnalyticsController } from "./analytics-controller"; +import { ProbesApp as ProbesAppProvider } from "./liveness/probes"; + +const MONITORING_PORT = 9500; +const IAM_SESSION_PORT = 9876; +const PROBES_PORT = 9400; @injectable() export class Server { @@ -65,6 +69,8 @@ export class Server { protected privateApiServer?: http.Server; protected readonly eventEmitter = new EventEmitter(); + protected probesApp: express.Application; + protected probesServer?: http.Server; protected app?: express.Application; protected httpServer?: http.Server; protected monitoringApp?: express.Application; @@ -79,7 +85,6 @@ export class Server { @inject(UserController) private readonly userController: UserController, @inject(WebsocketConnectionManager) private readonly websocketConnectionHandler: WebsocketConnectionManager, @inject(WorkspaceDownloadService) private readonly workspaceDownloadService: WorkspaceDownloadService, - @inject(LivenessController) private readonly livenessController: LivenessController, @inject(MonitoringEndpointsApp) private readonly monitoringEndpointsApp: MonitoringEndpointsApp, @inject(CodeSyncService) private readonly codeSyncService: CodeSyncService, @inject(HeadlessLogController) private readonly headlessLogController: HeadlessLogController, @@ -100,6 +105,7 @@ export class Server { @inject(API) private readonly api: API, @inject(RedisSubscriber) private readonly redisSubscriber: RedisSubscriber, @inject(AnalyticsController) private readonly analyticsController: AnalyticsController, + @inject(ProbesAppProvider) private readonly probesAppProvider: ProbesAppProvider, ) {} public async init(app: express.Application) { @@ -113,6 +119,9 @@ export class Server { await this.typeOrm.connect(); log.info("connected to DB"); + // probes + this.probesApp = this.probesAppProvider.create(); + // metrics app.use((req: express.Request, res: express.Response, next: express.NextFunction) => { const startTime = Date.now(); @@ -319,7 +328,6 @@ export class Server { // Authorization: none app.use(this.oneTimeSecretServer.apiRouter); app.use(this.newsletterSubscriptionController.apiRouter); - app.use("/live", this.livenessController.apiRouter); app.use("/version", (req: express.Request, res: express.Response, next: express.NextFunction) => { res.send(this.config.version); }); @@ -351,6 +359,11 @@ export class Server { throw new Error("server cannot start, not initialized"); } + const probeServer = this.probesApp.listen(PROBES_PORT, () => { + log.info(`probes server listening on port: ${(probeServer.address()).port}`); + }); + this.probesServer = probeServer; + const httpServer = this.app.listen(port, () => { this.eventEmitter.emit(Server.EVENT_ON_START, httpServer); log.info(`server listening on port: ${(httpServer.address()).port}`); @@ -358,7 +371,7 @@ export class Server { this.httpServer = httpServer; if (this.monitoringApp) { - this.monitoringHttpServer = this.monitoringApp.listen(9500, "localhost", () => { + this.monitoringHttpServer = this.monitoringApp.listen(MONITORING_PORT, "localhost", () => { log.info( `monitoring app listening on port: ${(this.monitoringHttpServer!.address()).port}`, ); @@ -366,7 +379,7 @@ export class Server { } if (this.iamSessionApp) { - this.iamSessionAppServer = this.iamSessionApp.listen(9876, () => { + this.iamSessionAppServer = this.iamSessionApp.listen(IAM_SESSION_PORT, () => { log.info( `IAM session server listening on port: ${(this.iamSessionAppServer!.address()).port}`, ); @@ -406,6 +419,7 @@ export class Server { race(this.stopServer(this.httpServer), "stop httpserver"), race(this.stopServer(this.privateApiServer), "stop private api server"), race(this.stopServer(this.publicApiServer), "stop public api server"), + race(this.stopServer(this.probesServer), "stop probe server"), race((async () => this.disposables.dispose())(), "dispose disposables"), ]); diff --git a/components/ws-manager-mk2/ws-manager-mk2.code-workspace b/components/ws-manager-mk2/ws-manager-mk2.code-workspace index b1edf6ddc7a894..0b73e8d6ad487f 100644 --- a/components/ws-manager-mk2/ws-manager-mk2.code-workspace +++ b/components/ws-manager-mk2/ws-manager-mk2.code-workspace @@ -46,7 +46,6 @@ }, "go.lintTool": "golangci-lint", "gopls": { - "allowModfileModifications": true } } } diff --git a/gitpod-ws.code-workspace b/gitpod-ws.code-workspace index 9cb0715180f369..c119e4269bcd8a 100644 --- a/gitpod-ws.code-workspace +++ b/gitpod-ws.code-workspace @@ -162,7 +162,6 @@ "go.lintTool": "golangci-lint", "go.lintFlags": ["-disable", "govet,errcheck,staticcheck", "--allow-parallel-runners", "--timeout", "15m"], "gopls": { - "allowModfileModifications": true }, "prettier.configPath": "/workspace/gitpod/.prettierrc.json" } diff --git a/install/installer/pkg/components/server/constants.go b/install/installer/pkg/components/server/constants.go index 8e024ada3ad2f9..32e4ffd740ce5a 100644 --- a/install/installer/pkg/components/server/constants.go +++ b/install/installer/pkg/components/server/constants.go @@ -25,6 +25,8 @@ const ( DebugNodePortName = "debugnode" ServicePort = 3000 personalAccessTokenSigningKeyMountPath = "/secrets/personal-access-token-signing-key" + ProbesPort = 9400 + ProbesPortName = "probes" AdminCredentialsSecretName = "admin-credentials" AdminCredentialsSecretMountPath = "/credentials/admin" diff --git a/install/installer/pkg/components/server/deployment.go b/install/installer/pkg/components/server/deployment.go index ad973c6143ed00..c278d7b29013ec 100644 --- a/install/installer/pkg/components/server/deployment.go +++ b/install/installer/pkg/components/server/deployment.go @@ -363,7 +363,7 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) { Path: "/live", Port: intstr.IntOrString{ Type: intstr.Int, - IntVal: ContainerPort, + IntVal: ProbesPort, }, }, }, @@ -371,35 +371,61 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) { PeriodSeconds: 10, FailureThreshold: 6, }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/ready", + Port: intstr.IntOrString{ + Type: intstr.Int, + IntVal: ProbesPort, + }, + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 10, + FailureThreshold: 12, // try for 120 seconds + }, SecurityContext: &corev1.SecurityContext{ Privileged: pointer.Bool(false), AllowPrivilegeEscalation: pointer.Bool(false), }, - Ports: []corev1.ContainerPort{{ - Name: ContainerPortName, - ContainerPort: ContainerPort, - }, { - Name: baseserver.BuiltinMetricsPortName, - ContainerPort: baseserver.BuiltinMetricsPort, - }, { - Name: InstallationAdminName, - ContainerPort: InstallationAdminPort, - }, { - Name: IAMSessionPortName, - ContainerPort: IAMSessionPort, - }, { - Name: DebugPortName, - ContainerPort: baseserver.BuiltinDebugPort, - }, { - Name: DebugNodePortName, - ContainerPort: common.DebugNodePort, - }, { - Name: GRPCAPIName, - ContainerPort: GRPCAPIPort, - }, { - Name: PublicAPIName, - ContainerPort: PublicAPIPort, - }, + Ports: []corev1.ContainerPort{ + { + Name: ContainerPortName, + ContainerPort: ContainerPort, + }, + { + Name: baseserver.BuiltinMetricsPortName, + ContainerPort: baseserver.BuiltinMetricsPort, + }, + { + Name: InstallationAdminName, + ContainerPort: InstallationAdminPort, + }, + { + Name: IAMSessionPortName, + ContainerPort: IAMSessionPort, + }, + { + Name: DebugPortName, + ContainerPort: baseserver.BuiltinDebugPort, + }, + { + Name: DebugNodePortName, + ContainerPort: common.DebugNodePort, + }, + { + Name: GRPCAPIName, + ContainerPort: GRPCAPIPort, + }, + { + Name: PublicAPIName, + ContainerPort: PublicAPIPort, + }, + { + Name: ProbesPortName, + ContainerPort: ProbesPort, + }, }, // todo(sje): do we need to cater for serverContainer.env from values.yaml? Env: common.CustomizeEnvvar(ctx, Component, env), diff --git a/memory-bank/.clinerules b/memory-bank/.clinerules index 599a0c9ec29b52..b4c1a7b5c25a01 100644 --- a/memory-bank/.clinerules +++ b/memory-bank/.clinerules @@ -26,6 +26,15 @@ This file captures important patterns, preferences, and project intelligence tha ## Development Workflow +- **Feature Development Process**: + - Follow the Product Requirements Document (PRD) workflow documented in systemPatterns.md under "Development Workflows" + - Create PRD documents in the `prd/` directory with naming convention `NNN-featurename.md` + - Include standard sections: Overview, Background, Requirements, Implementation Details, Testing, Deployment Considerations, Future Improvements + - Use Plan Mode for requirements gathering and planning + - Use Act Mode for implementation and documentation updates + - Always update memory bank with new knowledge gained during implementation + - Reference the PRD in commit messages and documentation + - **Build Approaches**: - **In-tree builds** (primary for local development): - TypeScript components: Use `yarn` commands defined in package.json diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md index cc91a0431e36a3..ae6b25f3eb1d28 100644 --- a/memory-bank/activeContext.md +++ b/memory-bank/activeContext.md @@ -10,10 +10,13 @@ Key areas of focus include: 2. **Component Documentation**: Creating detailed documentation for each component 3. **Development Workflow**: Understanding how to effectively develop and test changes 4. **Documentation**: Maintaining a comprehensive memory bank for future reference +5. **Standardized Workflows**: Establishing and documenting standardized workflows for development activities ## Recent Changes - Created the initial memory bank structure with core files +- Established standardized workflows for development activities: + - Product Requirements Document (PRD) workflow for feature development - Added a components subdirectory to the memory bank - Created detailed documentation for key components: - blobserve: Service that provides static assets from OCI images @@ -144,5 +147,23 @@ Initial exploration of the Gitpod codebase has revealed: - Works by copying relevant sources into a separate file tree - Can also be run from inside the workspace - Manages complex dependencies between components +- **Server Health Checks**: The Gitpod server uses two distinct health check mechanisms: + - **Liveness Probe**: Checks the event loop lag to determine if the server is functioning properly + - **Readiness Probe**: Checks database, SpiceDB, and Redis connectivity to ensure the server is ready to handle requests + - Controlled by a ConfigCat feature flag `server_readiness_probe` (default: true) that can bypass the actual checks +- **Critical Dependencies**: The server has critical external dependencies that must be operational: + - **Database (TypeORM)**: Used for persistent storage + - **SpiceDB**: Used for authorization and permission management + - **Redis**: Used for caching, pub/sub messaging, and distributed locking +- **Server Architecture Patterns**: + - The server uses dependency injection (Inversify) for component management + - Components are registered in `container-module.ts` and injected where needed + - The server exposes HTTP endpoints for health checks and other functionality + - Routes are registered in the `registerRoutes` method in `server.ts` + - New functionality typically requires: + 1. Creating a new controller/service class + 2. Registering it in the container module + 3. Injecting it where needed + 4. Updating any relevant configuration files This section will be continuously updated as new insights are gained through working with the system. diff --git a/memory-bank/progress.md b/memory-bank/progress.md index 266640c698f8fc..d8176fb89e5cd7 100644 --- a/memory-bank/progress.md +++ b/memory-bank/progress.md @@ -111,6 +111,7 @@ As we begin working with the codebase, we have not yet identified specific issue - Documentation of second set of API components (image-builder-api, local-app-api, registry-facade-api) - Documentation of third set of API components (supervisor-api, usage-api, ws-daemon-api) - Documentation of fourth set of API components (ws-manager-api, ws-manager-bridge-api) +- First feature implementation: Server readiness probe for database and SpiceDB connectivity ### Upcoming Milestones - Development environment setup @@ -204,6 +205,28 @@ No specific blockers or dependencies have been identified yet. This section will - ws-manager-api: Interfaces for managing the lifecycle of workspaces in Kubernetes clusters - ws-manager-bridge-api: Interfaces for dynamic management of workspace clusters +- **3/17/2025**: + - Implemented server readiness probe: + - Created ReadinessController to check database and SpiceDB connectivity + - Updated server container module and route configuration + - Created PRD document for the readiness probe implementation + - Updated Kubernetes deployment configuration to add the readiness probe + - Added ConfigCat feature flag `server_readiness_probe` to control readiness checks + - Enhanced server readiness probe with Redis health check: + - Added Redis connectivity check to the ReadinessController + - Updated PRD document to include Redis in the list of checked dependencies + - Ensured the feature flag continues to work with the new Redis check + - Updated memory bank with new learnings: + - Added information about server health checks and critical dependencies + - Documented server architecture patterns and dependency injection + - Added information about Kubernetes deployment configuration + - Documented feature flag implementation for readiness probe + - Added Redis as a critical dependency for the server + - Established standardized development workflows: + - Created `workflows.md` to document standardized development processes + - Documented the Product Requirements Document (PRD) workflow + - Updated `activeContext.md` to reference the new workflows + ## Next Evaluation Point The next evaluation of progress will occur after: diff --git a/memory-bank/systemPatterns.md b/memory-bank/systemPatterns.md index 7fe8b7263c5933..fc23decbcf0192 100644 --- a/memory-bank/systemPatterns.md +++ b/memory-bank/systemPatterns.md @@ -129,3 +129,19 @@ Workspaces are treated as immutable, with changes to configuration resulting in 6. **gRPC Communication**: Internal services communicate using gRPC for efficient, typed communication. 7. **Leeway Build System**: Custom build system for managing the complex dependencies between components. + +8. **Kubernetes Deployment Configuration**: All code that defines Kubernetes objects for deployable components lives in `install/installer`. This centralized approach ensures consistent deployment patterns across all components. + +## Development Workflows + +### Product Requirements Document (PRD) Workflow +Gitpod uses a structured PRD workflow for feature development to ensure proper planning, documentation, and implementation: + +1. **Requirements Gathering** (Plan Mode): Understand the problem, explore existing components, gather information +2. **PRD Creation** (Plan Mode): Create a detailed document in `prd/` with standardized sections +3. **Implementation Planning** (Plan Mode): Identify files to modify and plan the approach +4. **Implementation** (Act Mode): Create/modify necessary files following the plan +5. **Documentation Update** (Act Mode): Update memory bank with new knowledge +6. **Verification**: Ensure implementation meets requirements and documentation is complete + +This workflow ensures thorough planning, clear documentation, and knowledge preservation for all feature development.