diff --git a/apps/api/src/billing.ts b/apps/api/src/billing.ts
index 3bbafbbc4b..eb86c11b27 100644
--- a/apps/api/src/billing.ts
+++ b/apps/api/src/billing.ts
@@ -1,8 +1,8 @@
 // https://github.com/t3dotgg/stripe-recommendations/blob/main/README.md
 import Stripe from "stripe";
 
-import { stripe } from "./stripe";
-import { supabaseAdmin } from "./supabase";
+import { stripe } from "./integration/stripe";
+import { supabaseAdmin } from "./integration/supabase";
 
 const HANDLED_EVENTS: Stripe.Event.Type[] = [
   "checkout.session.completed",
diff --git a/apps/api/src/hono-bindings.ts b/apps/api/src/hono-bindings.ts
index e0c7a009d5..c316410b09 100644
--- a/apps/api/src/hono-bindings.ts
+++ b/apps/api/src/hono-bindings.ts
@@ -1,7 +1,9 @@
+import type * as Sentry from "@sentry/bun";
 import type Stripe from "stripe";
 
 export type AppBindings = {
   Variables: {
     stripeEvent: Stripe.Event;
+    sentrySpan: Sentry.Span;
   };
 };
diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts
index ff3c9a4a54..f4f29c398b 100644
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@@ -11,11 +11,13 @@ import { logger } from "hono/logger";
 
 import { env } from "./env";
 import type { AppBindings } from "./hono-bindings";
-import { loadTestOverride } from "./load-test-auth";
+import {
+  loadTestOverride,
+  sentryMiddleware,
+  supabaseAuthMiddleware,
+  verifyStripeWebhook,
+} from "./middleware";
 import { API_TAGS, routes } from "./routes";
-import { sentryMiddleware } from "./sentry/middleware";
-import { verifyStripeWebhook } from "./stripe";
-import { requireSupabaseAuth } from "./supabase";
 
 const app = new Hono<AppBindings>();
 
@@ -44,12 +46,12 @@ app.use("*", (c, next) => {
   return corsMiddleware(c, next);
 });
 
-app.use("/chat/completions", loadTestOverride, requireSupabaseAuth);
+app.use("/chat/completions", loadTestOverride, supabaseAuthMiddleware);
 app.use("/webhook/stripe", verifyStripeWebhook);
 
 if (env.NODE_ENV !== "development") {
-  app.use("/listen", loadTestOverride, requireSupabaseAuth);
-  app.use("/transcribe", loadTestOverride, requireSupabaseAuth);
+  app.use("/listen", loadTestOverride, supabaseAuthMiddleware);
+  app.use("/transcribe", loadTestOverride, supabaseAuthMiddleware);
 }
 
 app.route("/", routes);
diff --git a/apps/api/src/integration/index.ts b/apps/api/src/integration/index.ts
new file mode 100644
index 0000000000..f22e73bee8
--- /dev/null
+++ b/apps/api/src/integration/index.ts
@@ -0,0 +1,4 @@
+export * from "./supabase";
+export * from "./stripe";
+export * from "./openrouter";
+export * from "./posthog";
diff --git a/apps/api/src/integration/openrouter.ts b/apps/api/src/integration/openrouter.ts
new file mode 100644
index 0000000000..97b3f36f5e
--- /dev/null
+++ b/apps/api/src/integration/openrouter.ts
@@ -0,0 +1,23 @@
+import { OpenAI as PostHogOpenAI } from "@posthog/ai";
+
+import { env } from "../env";
+import { posthog } from "./posthog";
+
+export const openai = new PostHogOpenAI({
+  baseURL: "https://openrouter.ai/api/v1",
+  apiKey: env.OPENROUTER_API_KEY,
+  posthog,
+});
+
+const MODELS = {
+  toolCalling: [
+    "moonshotai/kimi-k2-0905:exacto",
+    "anthropic/claude-haiku-4.5",
+    "openai/gpt-oss-120b:exacto",
+  ],
+  default: ["moonshotai/kimi-k2-0905", "openai/gpt-5.1-chat"],
+} as const;
+
+export function getModels(needsToolCalling: boolean): string[] {
+  return needsToolCalling ? [...MODELS.toolCalling] : [...MODELS.default];
+}
diff --git a/apps/api/src/posthog.ts b/apps/api/src/integration/posthog.ts
similarity index 84%
rename from apps/api/src/posthog.ts
rename to apps/api/src/integration/posthog.ts
index c1d1712202..30a63e03d9 100644
--- a/apps/api/src/posthog.ts
+++ b/apps/api/src/integration/posthog.ts
@@ -1,6 +1,6 @@
 import { PostHog } from "posthog-node";
 
-import { env } from "./env";
+import { env } from "../env";
 
 export const posthog = new PostHog(env.POSTHOG_API_KEY, {
   host: "https://us.i.posthog.com",
diff --git a/apps/api/src/integration/stripe.ts b/apps/api/src/integration/stripe.ts
new file mode 100644
index 0000000000..a3157276a9
--- /dev/null
+++ b/apps/api/src/integration/stripe.ts
@@ -0,0 +1,7 @@
+import Stripe from "stripe";
+
+import { env } from "../env";
+
+export const stripe = new Stripe(env.STRIPE_SECRET_KEY, {
+  apiVersion: "2025-10-29.clover",
+});
diff --git a/apps/api/src/integration/supabase.ts b/apps/api/src/integration/supabase.ts
new file mode 100644
index 0000000000..4d2b63672f
--- /dev/null
+++ b/apps/api/src/integration/supabase.ts
@@ -0,0 +1,8 @@
+import { createClient } from "@supabase/supabase-js";
+
+import { env } from "../env";
+
+export const supabaseAdmin = createClient(
+  env.SUPABASE_URL,
+  env.SUPABASE_SERVICE_ROLE_KEY,
+);
diff --git a/apps/api/src/listen.ts b/apps/api/src/listen.ts
index e9a85cfd2d..1494891cb3 100644
--- a/apps/api/src/listen.ts
+++ b/apps/api/src/listen.ts
@@ -2,7 +2,7 @@ import * as Sentry from "@sentry/bun";
 import type { Handler } from "hono";
 import { upgradeWebSocket } from "hono/bun";
 
-import { Metrics } from "./sentry/metrics";
+import { Metrics } from "./metrics";
 import {
   createProxyFromRequest,
   normalizeWsData,
diff --git a/apps/api/src/sentry/metrics.ts b/apps/api/src/metrics.ts
similarity index 91%
rename from apps/api/src/sentry/metrics.ts
rename to apps/api/src/metrics.ts
index b9a857821b..5a93f5e66f 100644
--- a/apps/api/src/sentry/metrics.ts
+++ b/apps/api/src/metrics.ts
@@ -1,35 +1,43 @@
 import * as Sentry from "@sentry/bun";
 
-export const Metrics = {
+const billing = {
+  billingSync: (success: boolean, eventType: string) => {
+    Sentry.metrics.count("billing.sync", 1, {
+      attributes: { success: String(success), event_type: eventType },
+    });
+  },
+};
+
+const stt = {
   websocketConnected: (provider: string) => {
     Sentry.metrics.count("websocket.connected", 1, {
       attributes: { provider },
     });
   },
-
   websocketDisconnected: (provider: string, durationMs: number) => {
     Sentry.metrics.distribution("websocket.duration", durationMs, {
       unit: "millisecond",
       attributes: { provider },
     });
   },
+};
 
-  billingSync: (success: boolean, eventType: string) => {
-    Sentry.metrics.count("billing.sync", 1, {
-      attributes: { success: String(success), event_type: eventType },
-    });
-  },
-
+const llm = {
   chatCompletion: (streaming: boolean, statusCode: number) => {
     Sentry.metrics.count("chat.completion", 1, {
       attributes: { streaming: String(streaming), status: String(statusCode) },
     });
   },
+};
 
+export const Metrics = {
+  ...stt,
+  ...llm,
   upstreamLatency: (provider: string, durationMs: number) => {
     Sentry.metrics.distribution("upstream.latency", durationMs, {
       unit: "millisecond",
       attributes: { provider },
     });
   },
+  ...billing,
 };
diff --git a/apps/api/src/middleware/index.ts b/apps/api/src/middleware/index.ts
new file mode 100644
index 0000000000..c27759c669
--- /dev/null
+++ b/apps/api/src/middleware/index.ts
@@ -0,0 +1,4 @@
+export * from "./load-test-auth";
+export * from "./sentry";
+export * from "./supabase";
+export * from "./stripe";
diff --git a/apps/api/src/load-test-auth.ts b/apps/api/src/middleware/load-test-auth.ts
similarity index 93%
rename from apps/api/src/load-test-auth.ts
rename to apps/api/src/middleware/load-test-auth.ts
index 531c62cfda..64d8a9fc2c 100644
--- a/apps/api/src/load-test-auth.ts
+++ b/apps/api/src/middleware/load-test-auth.ts
@@ -1,6 +1,6 @@
 import { createMiddleware } from "hono/factory";
 
-import { env } from "./env";
+import { env } from "../env";
 
 export const loadTestOverride = createMiddleware<{
   Variables: { supabaseUserId: string };
diff --git a/apps/api/src/middleware/sentry.ts b/apps/api/src/middleware/sentry.ts
new file mode 100644
index 0000000000..51ae1c3471
--- /dev/null
+++ b/apps/api/src/middleware/sentry.ts
@@ -0,0 +1,29 @@
+import * as Sentry from "@sentry/bun";
+import { createMiddleware } from "hono/factory";
+
+import type { AppBindings } from "../hono-bindings";
+
+export const sentryMiddleware = createMiddleware<AppBindings>(
+  async (c, next) => {
+    const sentryTrace = c.req.header("sentry-trace");
+    const baggage = c.req.header("baggage");
+
+    return Sentry.continueTrace({ sentryTrace, baggage }, async () => {
+      return Sentry.startSpan(
+        {
+          name: `${c.req.method} ${c.req.path}`,
+          op: "http.server",
+          attributes: {
+            "http.method": c.req.method,
+            "http.url": c.req.url,
+          },
+        },
+        async (span) => {
+          c.set("sentrySpan", span);
+          await next();
+          span.setAttribute("http.status_code", c.res.status);
+        },
+      );
+    });
+  },
+);
diff --git a/apps/api/src/stripe.ts b/apps/api/src/middleware/stripe.ts
similarity index 88%
rename from apps/api/src/stripe.ts
rename to apps/api/src/middleware/stripe.ts
index a7cfcc7777..dcc12a4494 100644
--- a/apps/api/src/stripe.ts
+++ b/apps/api/src/middleware/stripe.ts
@@ -2,11 +2,8 @@ import * as Sentry from "@sentry/bun";
 import { createMiddleware } from "hono/factory";
 import Stripe from "stripe";
 
-import { env } from "./env";
-
-export const stripe = new Stripe(env.STRIPE_SECRET_KEY, {
-  apiVersion: "2025-10-29.clover",
-});
+import { env } from "../env";
+import { stripe } from "../integration/stripe";
 
 const cryptoProvider = Stripe.createSubtleCryptoProvider();
 
diff --git a/apps/api/src/supabase.ts b/apps/api/src/middleware/supabase.ts
similarity index 78%
rename from apps/api/src/supabase.ts
rename to apps/api/src/middleware/supabase.ts
index 7fb1b11ca6..567dba3d47 100644
--- a/apps/api/src/supabase.ts
+++ b/apps/api/src/middleware/supabase.ts
@@ -1,14 +1,9 @@
 import { createClient } from "@supabase/supabase-js";
 import { createMiddleware } from "hono/factory";
 
-import { env } from "./env";
+import { env } from "../env";
 
-export const supabaseAdmin = createClient(
-  env.SUPABASE_URL,
-  env.SUPABASE_SERVICE_ROLE_KEY,
-);
-
-export const requireSupabaseAuth = createMiddleware<{
+export const supabaseAuthMiddleware = createMiddleware<{
   Variables: { supabaseUserId: string };
 }>(async (c, next) => {
   const authHeader = c.req.header("Authorization");
diff --git a/apps/api/src/routes.ts b/apps/api/src/routes.ts
index 9a1087ee6f..fa4c5f865c 100644
--- a/apps/api/src/routes.ts
+++ b/apps/api/src/routes.ts
@@ -1,20 +1,12 @@
-import { OpenAI as PostHogOpenAI } from "@posthog/ai";
 import * as Sentry from "@sentry/bun";
 import { Hono } from "hono";
 import { describeRoute } from "hono-openapi";
 import { resolver, validator } from "hono-openapi/zod";
 import { z } from "zod";
 
-import { env } from "./env";
 import type { AppBindings } from "./hono-bindings";
-import { posthog } from "./posthog";
-import { Metrics } from "./sentry/metrics";
-
-const openai = new PostHogOpenAI({
-  baseURL: "https://openrouter.ai/api/v1",
-  apiKey: env.OPENROUTER_API_KEY,
-  posthog,
-});
+import { getModels, openai } from "./integration/openrouter";
+import { Metrics } from "./metrics";
 
 export const API_TAGS = {
   INTERNAL: "internal",
@@ -135,121 +127,102 @@ routes.post(
   validator("json", ChatCompletionRequestSchema),
   async (c) => {
     const requestBody = c.req.valid("json");
+    const span = c.get("sentrySpan");
 
-    return Sentry.startSpan(
-      { op: "http.client", name: "openrouter.chat.completions" },
-      async (span) => {
-        const toolChoice = requestBody.tool_choice;
-        const needsToolCalling =
-          Array.isArray(requestBody.tools) &&
-          !(typeof toolChoice === "string" && toolChoice === "none");
-
-        const modelsToUse = needsToolCalling
-          ? [
-              "moonshotai/kimi-k2-0905:exacto",
-              "anthropic/claude-haiku-4.5",
-              "openai/gpt-oss-120b:exacto",
-            ]
-          : ["moonshotai/kimi-k2-0905", "openai/gpt-5.1-chat"];
-
-        span.setAttribute("chat.tool_calling", needsToolCalling);
-        span.setAttribute("chat.streaming", requestBody.stream ?? false);
-
-        const {
-          model: _ignoredModel,
-          stream,
-          messages,
-          tools,
-          tool_choice,
-          temperature,
-          max_tokens,
-          ...restBody
-        } = requestBody;
+    const toolChoice = requestBody.tool_choice;
+    const needsToolCalling =
+      Array.isArray(requestBody.tools) &&
+      !(typeof toolChoice === "string" && toolChoice === "none");
 
-        const startTime = performance.now();
+    span?.setAttribute("chat.tool_calling", needsToolCalling);
+    span?.setAttribute("chat.streaming", requestBody.stream ?? false);
 
-        try {
-          const createParams = {
-            model: "",
-            messages,
-            tools,
-            tool_choice,
-            temperature,
-            max_tokens,
-          } as Parameters<typeof openai.chat.completions.create>[0];
-          const extraBody = {
-            ...restBody,
-            models: modelsToUse,
-            provider: { sort: "latency" },
-          };
-
-          if (stream) {
-            const streamResponse = await openai.chat.completions.create(
-              { ...createParams, stream: true },
-              { body: extraBody },
-            );
+    const {
+      model: _ignoredModel,
+      stream,
+      messages,
+      tools,
+      tool_choice,
+      temperature,
+      max_tokens,
+      ...restBody
+    } = requestBody;
 
-            Metrics.upstreamLatency(
-              "openrouter",
-              performance.now() - startTime,
-            );
-            Metrics.chatCompletion(true, 200);
-            span.setAttribute("http.status_code", 200);
-
-            const encoder = new TextEncoder();
-            const readableStream = new ReadableStream({
-              async start(controller) {
-                try {
-                  for await (const chunk of streamResponse) {
-                    const data = `data: ${JSON.stringify(chunk)}\n\n`;
-                    controller.enqueue(encoder.encode(data));
-                  }
-                  controller.enqueue(encoder.encode("data: [DONE]\n\n"));
-                  controller.close();
-                } catch (error) {
-                  Sentry.captureException(error, {
-                    tags: { streaming: true },
-                  });
-                  controller.error(error);
-                }
-              },
-            });
-
-            return new Response(readableStream, {
-              status: 200,
-              headers: {
-                "Content-Type": "text/event-stream",
-                "Cache-Control": "no-cache",
-                Connection: "keep-alive",
-              },
-            });
-          }
+    const startTime = performance.now();
 
-          const response = await openai.chat.completions.create(
-            { ...createParams, stream: false },
-            { body: extraBody },
-          );
+    try {
+      const createParams = {
+        model: "openrouter/auto",
+        messages,
+        tools,
+        tool_choice,
+        temperature,
+        max_tokens,
+      } as Parameters<typeof openai.chat.completions.create>[0];
+      const extraBody = {
+        ...restBody,
+        models: getModels(needsToolCalling),
+        provider: { sort: "latency" },
+      };
+
+      if (stream) {
+        const streamResponse = await openai.chat.completions.create(
+          { ...createParams, stream: true },
+          { body: extraBody },
+        );
+
+        Metrics.upstreamLatency("openrouter", performance.now() - startTime);
+
+        const encoder = new TextEncoder();
+        const readableStream = new ReadableStream({
+          async start(controller) {
+            try {
+              for await (const chunk of streamResponse) {
+                const data = `data: ${JSON.stringify(chunk)}\n\n`;
+                controller.enqueue(encoder.encode(data));
+              }
+              controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+              controller.close();
+              Metrics.chatCompletion(true, 200);
+            } catch (error) {
+              Metrics.chatCompletion(true, 500);
+              Sentry.captureException(error, {
+                tags: { streaming: true },
+              });
+              controller.error(error);
+            }
+          },
+        });
+
+        return new Response(readableStream, {
+          status: 200,
+          headers: {
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache",
+            Connection: "keep-alive",
+          },
+        });
+      }
 
-          Metrics.upstreamLatency("openrouter", performance.now() - startTime);
-          Metrics.chatCompletion(false, 200);
-          span.setAttribute("http.status_code", 200);
+      const response = await openai.chat.completions.create(
+        { ...createParams, stream: false },
+        { body: extraBody },
+      );
 
-          return c.json(response, 200);
-        } catch (error) {
-          Metrics.upstreamLatency("openrouter", performance.now() - startTime);
-          const isAPIError =
-            error instanceof Error &&
-            "status" in error &&
-            typeof (error as { status?: number }).status === "number";
-          const status = isAPIError
-            ? (error as { status: number }).status
-            : 500;
-          Metrics.chatCompletion(stream ?? false, status);
-          span.setAttribute("http.status_code", status);
-          throw error;
-        }
-      },
-    );
+      Metrics.upstreamLatency("openrouter", performance.now() - startTime);
+      Metrics.chatCompletion(false, 200);
+
+      return c.json(response, 200);
+    } catch (error) {
+      Metrics.upstreamLatency("openrouter", performance.now() - startTime);
+      const isAPIError =
+        error instanceof Error &&
+        "status" in error &&
+        typeof (error as { status?: number }).status === "number";
+      const status = isAPIError ? (error as { status: number }).status : 500;
+      Metrics.chatCompletion(stream ?? false, status);
+      throw error;
+    }
   },
 );
 
@@ -297,14 +270,11 @@ routes.post(
     const { syncBillingForStripeEvent } = await import("./billing");
 
     const stripeEvent = c.get("stripeEvent");
+    const span = c.get("sentrySpan");
+    span?.setAttribute("stripe.event_type", stripeEvent.type);
 
     try {
-      await Sentry.startSpan(
-        { op: "billing.sync", name: `stripe.${stripeEvent.type}` },
-        async () => {
-          await syncBillingForStripeEvent(stripeEvent);
-        },
-      );
+      await syncBillingForStripeEvent(stripeEvent);
       Metrics.billingSync(true, stripeEvent.type);
     } catch (error) {
       Metrics.billingSync(false, stripeEvent.type);
diff --git a/apps/api/src/routes/constants.ts b/apps/api/src/routes/constants.ts
new file mode 100644
index 0000000000..2675a14cd6
--- /dev/null
+++ b/apps/api/src/routes/constants.ts
@@ -0,0 +1,6 @@
+export const API_TAGS = {
+  INTERNAL: "internal",
+  APP: "app",
+  WEBHOOK: "webhook",
+  PUBLIC: "public",
+} as const;
diff --git a/apps/api/src/routes/health.ts b/apps/api/src/routes/health.ts
new file mode 100644
index 0000000000..a67fea16b3
--- /dev/null
+++ b/apps/api/src/routes/health.ts
@@ -0,0 +1,33 @@
+import { Hono } from "hono";
+import { describeRoute } from "hono-openapi";
+import { resolver } from "hono-openapi/zod";
+import { z } from "zod";
+
+import type { AppBindings } from "../hono-bindings";
+import { API_TAGS } from "./constants";
+
+const HealthResponseSchema = z.object({
+  status: z.string(),
+});
+
+export const health = new Hono<AppBindings>();
+
+health.get(
+  "/",
+  describeRoute({
+    tags: [API_TAGS.INTERNAL],
+    summary: "Health check",
+    description: "Returns the health status of the API server.",
+    responses: {
+      200: {
+        description: "API is healthy",
+        content: {
+          "application/json": {
+            schema: resolver(HealthResponseSchema),
+          },
+        },
+      },
+    },
+  }),
+  (c) => c.json({ status: "ok" }, 200),
+);
diff --git a/apps/api/src/routes/index.ts b/apps/api/src/routes/index.ts
new file mode 100644
index 0000000000..f21102740d
--- /dev/null
+++ b/apps/api/src/routes/index.ts
@@ -0,0 +1,16 @@
+import { Hono } from "hono";
+
+import type { AppBindings } from "../hono-bindings";
+import { health } from "./health";
+import { llm } from "./llm";
+import { stt } from "./stt";
+import { webhook } from "./webhook";
+
+export { API_TAGS } from "./constants";
+
+export const routes = new Hono<AppBindings>();
+
+routes.route("/health", health);
+routes.route("/chat", llm);
+routes.route("/", stt);
+routes.route("/webhook", webhook);
diff --git a/apps/api/src/routes/llm.ts b/apps/api/src/routes/llm.ts
new file mode 100644
index 0000000000..eadd8ea645
--- /dev/null
+++ b/apps/api/src/routes/llm.ts
@@ -0,0 +1,151 @@
+import * as Sentry from "@sentry/bun";
+import { Hono } from "hono";
+import { describeRoute } from "hono-openapi";
+import { validator } from "hono-openapi/zod";
+import { z } from "zod";
+
+import type { AppBindings } from "../hono-bindings";
+import { getModels, openai } from "../integration/openrouter";
+import { Metrics } from "../metrics";
+import { API_TAGS } from "./constants";
+
+const ChatCompletionMessageSchema = z.object({
+  role: z.enum(["system", "user", "assistant"]),
+  content: z.string(),
+});
+
+const ChatCompletionRequestSchema = z.looseObject({
+  model: z.string().optional(),
+  messages: z.array(ChatCompletionMessageSchema),
+  tools: z.array(z.unknown()).optional(),
+  tool_choice: z.union([z.string(), z.object({})]).optional(),
+  stream: z.boolean().optional(),
+  temperature: z.number().optional(),
+  max_tokens: z.number().optional(),
+});
+
+export const llm = new Hono<AppBindings>();
+
+llm.post(
+  "/completions",
+  describeRoute({
+    tags: [API_TAGS.APP],
+    summary: "Chat completions",
+    description:
+      "OpenAI-compatible chat completions endpoint. Proxies requests to OpenRouter with automatic model selection. Requires Supabase authentication.",
+    security: [{ Bearer: [] }],
+    responses: {
+      200: {
+        description: "Chat completion response (streamed or non-streamed)",
+      },
+      401: {
+        description: "Unauthorized - missing or invalid authentication",
+        content: {
+          "text/plain": {
+            schema: { type: "string", example: "unauthorized" },
+          },
+        },
+      },
+    },
+  }),
+  validator("json", ChatCompletionRequestSchema),
+  async (c) => {
+    const requestBody = c.req.valid("json");
+    const span = c.get("sentrySpan");
+
+    const toolChoice = requestBody.tool_choice;
+    const needsToolCalling =
+      Array.isArray(requestBody.tools) &&
+      !(typeof toolChoice === "string" && toolChoice === "none");
+
+    span?.setAttribute("chat.tool_calling", needsToolCalling);
+    span?.setAttribute("chat.streaming", requestBody.stream ?? false);
+
+    const {
+      model: _ignoredModel,
+      stream,
+      messages,
+      tools,
+      tool_choice,
+      temperature,
+      max_tokens,
+      ...restBody
+    } = requestBody;
+
+    const startTime = performance.now();
+
+    try {
+      const createParams = {
+        model: "openrouter/auto",
+        messages,
+        tools,
+        tool_choice,
+        temperature,
+        max_tokens,
+      } as Parameters<typeof openai.chat.completions.create>[0];
+      const extraBody = {
+        ...restBody,
+        models: getModels(needsToolCalling),
+        provider: { sort: "latency" },
+      };
+
+      if (stream) {
+        const streamResponse = await openai.chat.completions.create(
+          { ...createParams, stream: true },
+          { body: extraBody },
+        );
+
+        Metrics.upstreamLatency("openrouter", performance.now() - startTime);
+
+        const encoder = new TextEncoder();
+        const readableStream = new ReadableStream({
+          async start(controller) {
+            try {
+              for await (const chunk of streamResponse) {
+                const data = `data: ${JSON.stringify(chunk)}\n\n`;
+                controller.enqueue(encoder.encode(data));
+              }
+              controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+              controller.close();
+              Metrics.chatCompletion(true, 200);
+            } catch (error) {
+              Metrics.chatCompletion(true, 500);
+              Sentry.captureException(error, {
+                tags: { streaming: true },
+              });
+              controller.error(error);
+            }
+          },
+        });
+
+        return new Response(readableStream, {
+          status: 200,
+          headers: {
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache",
+            Connection: "keep-alive",
+          },
+        });
+      }
+
+      const response = await openai.chat.completions.create(
+        { ...createParams, stream: false },
+        { body: extraBody },
+      );
+
+      Metrics.upstreamLatency("openrouter", performance.now() - startTime);
+      Metrics.chatCompletion(false, 200);
+
+      return c.json(response, 200);
+    } catch (error) {
+      Metrics.upstreamLatency("openrouter", performance.now() - startTime);
+      const isAPIError =
+        error instanceof Error &&
+        "status" in error &&
+        typeof (error as { status?: number }).status === "number";
+      const status = isAPIError ? (error as { status: number }).status : 500;
+      Metrics.chatCompletion(stream ?? false, status);
+      throw error;
+    }
+  },
+);
diff --git a/apps/api/src/routes/stt.ts b/apps/api/src/routes/stt.ts
new file mode 100644
index 0000000000..204b640493
--- /dev/null
+++ b/apps/api/src/routes/stt.ts
@@ -0,0 +1,66 @@
+import { Hono } from "hono";
+import { describeRoute } from "hono-openapi";
+import { resolver } from "hono-openapi/zod";
+import { z } from "zod";
+
+import type { AppBindings } from "../hono-bindings";
+import { API_TAGS } from "./constants";
+
+const WebSocketErrorSchema = z.object({
+  error: z.string(),
+  detail: z.string().optional(),
+});
+
+export const stt = new Hono<AppBindings>();
+
+stt.get(
+  "/listen",
+  describeRoute({
+    tags: [API_TAGS.APP],
+    summary: "Speech-to-text WebSocket",
+    description:
+      "WebSocket endpoint for real-time speech-to-text transcription via Deepgram. Requires Supabase authentication in production.",
+    security: [{ Bearer: [] }],
+    responses: {
+      101: {
+        description: "WebSocket upgrade successful",
+      },
+      400: {
+        description: "WebSocket upgrade failed",
+        content: {
+          "application/json": {
+            schema: resolver(WebSocketErrorSchema),
+          },
+        },
+      },
+      401: {
+        description: "Unauthorized - missing or invalid authentication",
+        content: {
+          "text/plain": {
+            schema: { type: "string", example: "unauthorized" },
+          },
+        },
+      },
+      502: {
+        description: "Upstream STT service unavailable",
+        content: {
+          "application/json": {
+            schema: resolver(WebSocketErrorSchema),
+          },
+        },
+      },
+      504: {
+        description: "Upstream STT service timeout",
+        content: {
+          "application/json": {
+            schema: resolver(WebSocketErrorSchema),
+          },
+        },
+      },
+    },
+  }),
+  async (c, next) => {
+    const { listenSocketHandler } = await import("../listen");
+    return listenSocketHandler(c, next);
+  },
+);
diff --git a/apps/api/src/routes/webhook.ts b/apps/api/src/routes/webhook.ts
new file mode 100644
index 0000000000..5b18002370
--- /dev/null
+++ b/apps/api/src/routes/webhook.ts
@@ -0,0 +1,81 @@
+import * as Sentry from "@sentry/bun";
+import { Hono } from "hono";
+import { describeRoute } from "hono-openapi";
+import { resolver, validator } from "hono-openapi/zod";
+import { z } from "zod";
+
+import type { AppBindings } from "../hono-bindings";
+import { Metrics } from "../metrics";
+import { API_TAGS } from "./constants";
+
+const WebhookSuccessSchema = z.object({
+  ok: z.boolean(),
+});
+
+const WebhookErrorSchema = z.object({
+  error: z.string(),
+});
+
+export const webhook = new Hono<AppBindings>();
+
+webhook.post(
+  "/stripe",
+  describeRoute({
+    tags: [API_TAGS.WEBHOOK],
+    summary: "Stripe webhook",
+    description:
+      "Handles Stripe webhook events for billing synchronization. Requires valid Stripe signature.",
+    responses: {
+      200: {
+        description: "Webhook processed successfully",
+        content: {
+          "application/json": {
+            schema: resolver(WebhookSuccessSchema),
+          },
+        },
+      },
+      400: {
+        description: "Invalid or missing Stripe signature",
+        content: {
+          "text/plain": {
+            schema: { type: "string", example: "missing_stripe_signature" },
+          },
+        },
+      },
+      500: {
+        description: "Internal server error during billing sync",
+        content: {
+          "application/json": {
+            schema: resolver(WebhookErrorSchema),
+          },
+        },
+      },
+    },
+  }),
+  validator(
+    "header",
+    z.object({
+      "stripe-signature": z.string(),
+    }),
+  ),
+  async (c) => {
+    const { syncBillingForStripeEvent } = await import("../billing");
+
+    const stripeEvent = c.get("stripeEvent");
+    const span = c.get("sentrySpan");
+    span?.setAttribute("stripe.event_type", stripeEvent.type);
+
+    try {
+      await syncBillingForStripeEvent(stripeEvent);
+      Metrics.billingSync(true, stripeEvent.type);
+    } catch (error) {
+      Metrics.billingSync(false, stripeEvent.type);
+      Sentry.captureException(error, {
+        tags: { webhook: "stripe", event_type: stripeEvent.type },
+      });
+      return c.json({ error: "stripe_billing_sync_failed" }, 500);
+    }
+
+    return c.json({ ok: true }, 200);
+  },
+);
diff --git a/apps/api/src/sentry/middleware.ts b/apps/api/src/sentry/middleware.ts
deleted file mode 100644
index 2a6e59bd12..0000000000
--- a/apps/api/src/sentry/middleware.ts
+++ /dev/null
@@ -1,24 +0,0 @@
-import * as Sentry from "@sentry/bun";
-import { createMiddleware } from "hono/factory";
-
-export const sentryMiddleware = createMiddleware(async (c, next) => {
-  const sentryTrace = c.req.header("sentry-trace");
-  const baggage = c.req.header("baggage");
-
-  return Sentry.continueTrace({ sentryTrace, baggage }, async () => {
-    return Sentry.startSpan(
-      {
-        name: `${c.req.method} ${c.req.path}`,
-        op: "http.server",
-        attributes: {
-          "http.method": c.req.method,
-          "http.url": c.req.url,
-        },
-      },
-      async (span) => {
-        await next();
-        span.setAttribute("http.status_code", c.res.status);
-      },
-    );
-  });
-});
diff --git a/apps/desktop/src/hooks/useLLMConnection.ts b/apps/desktop/src/hooks/useLLMConnection.ts
index a8c5af3e95..bae1325e2d 100644
--- a/apps/desktop/src/hooks/useLLMConnection.ts
+++ b/apps/desktop/src/hooks/useLLMConnection.ts
@@ -51,14 +51,164 @@ type LLMConnectionResult = {
 
 export const useLanguageModel = (): Exclude<LanguageModel, string> | null => {
   const { conn } = useLLMConnection();
+  return useMemo(() => (conn ? createLanguageModel(conn) : null), [conn]);
+};
+
+export const useLLMConnection = (): LLMConnectionResult => {
+  const auth = useAuth();
+  const billing = useBillingAccess();
+
+  const { current_llm_provider, current_llm_model } = main.UI.useValues(
+    main.STORE_ID,
+  );
+  const providerConfig = main.UI.useRow(
+    "ai_providers",
+    current_llm_provider ?? "",
+    main.STORE_ID,
+  ) as AIProviderStorage | undefined;
+
+  return useMemo<LLMConnectionResult>(
+    () =>
+      resolveLLMConnection({
+        providerId: current_llm_provider,
+        modelId: current_llm_model,
+        providerConfig,
+        session: auth?.session,
+        isPro: billing.isPro,
+      }),
+    [
+      auth,
+      billing.isPro,
+      current_llm_model,
+      current_llm_provider,
+      providerConfig,
+    ],
+  );
+};
+
+export const useLLMConnectionStatus = (): LLMConnectionStatus => {
+  const { status } = useLLMConnection();
+  return status;
+};
+
+const resolveLLMConnection = (params: {
+  providerId: string | undefined;
+  modelId: string | undefined;
+  providerConfig: AIProviderStorage | undefined;
+  session: { access_token: string } | null | undefined;
+  isPro: boolean;
+}): LLMConnectionResult => {
+  const {
+    providerId: rawProviderId,
+    modelId,
+    providerConfig,
+    session,
+    isPro,
+  } = params;
+
+  if (!rawProviderId) {
+    return {
+      conn: null,
+      status: { status: "pending", reason: "missing_provider" },
+    };
+  }
+
+  const providerId = rawProviderId as ProviderId;
+
+  if (!modelId) {
+    return {
+      conn: null,
+      status: { status: "pending", reason: "missing_model", providerId },
+    };
+  }
+
+  const providerDefinition = PROVIDERS.find((p) => p.id === rawProviderId);
+
+  if (!providerDefinition) {
+    return {
+      conn: null,
+      status: {
+        status: "error",
+        reason: "provider_not_found",
+        providerId: rawProviderId,
+      },
+    };
+  }
+
+  if (providerId === "hyprnote") {
+    if (!session) {
+      return {
+        conn: null,
+        status: { status: "error", reason: "unauthenticated", providerId },
+      };
+    }
 
-  return useMemo(() => {
-    if (!conn) {
-      return null;
+    if (!isPro) {
+      return {
+        conn: null,
+        status: { status: "error", reason: "not_pro", providerId },
+      };
     }
 
-    if (conn.providerId === "hyprnote") {
-      const hyprnoteProvider = createOpenAICompatible({
+    return {
+      conn: {
+        providerId,
+        modelId,
+        baseUrl: `${env.VITE_API_URL}${providerDefinition.baseUrl || ""}`,
+        apiKey: session.access_token,
+      },
+      status: { status: "success", providerId, isHosted: true },
+    };
+  }
+
+  const baseUrl =
+    providerConfig?.base_url?.trim() ||
+    providerDefinition.baseUrl?.trim() ||
+    "";
+  const apiKey = providerConfig?.api_key?.trim() || "";
+
+  const missing: Array<"base_url" | "api_key"> = [];
+  if (!baseUrl) {
+    missing.push("base_url");
+  }
+  if (providerDefinition.apiKey && !apiKey) {
+    missing.push("api_key");
+  }
+
+  if (missing.length > 0) {
+    return {
+      conn: null,
+      status: {
+        status: "error",
+        reason: "missing_config",
+        providerId,
+        missing,
+      },
+    };
+  }
+
+  return {
+    conn: { providerId, modelId, baseUrl, apiKey },
+    status: { status: "success", providerId, isHosted: false },
+  };
+};
+
+const wrapWithThinkingMiddleware = (model: Exclude<LanguageModel, string>) => {
+  return wrapLanguageModel({
+    model,
+    middleware: [
+      extractReasoningMiddleware({ tagName: "think" }),
+      extractReasoningMiddleware({ tagName: "thinking" }),
+    ],
+  });
+};
+
+const createLanguageModel = (
+  conn: LLMConnectionInfo,
+): Exclude<LanguageModel, string> => {
+  switch (conn.providerId) {
+    case "hyprnote": {
+      const provider = createOpenAICompatible({
         fetch: tracedFetch,
         name: "hyprnote",
         baseURL: conn.baseUrl,
@@ -67,14 +217,11 @@ export const useLanguageModel = (): Exclude<LanguageModel, string> | null => {
           Authorization: `Bearer ${conn.apiKey}`,
         },
       });
-
-      return wrapWithThinkingMiddleware(
-        hyprnoteProvider.chatModel(conn.modelId),
-      );
+      return wrapWithThinkingMiddleware(provider.chatModel(conn.modelId));
     }
 
-    if (conn.providerId === "anthropic") {
-      const anthropicProvider = createAnthropic({
+    case "anthropic": {
+      const provider = createAnthropic({
         fetch: tauriFetch,
         apiKey: conn.apiKey,
         headers: {
@@ -82,22 +229,20 @@ export const useLanguageModel = (): Exclude<LanguageModel, string> | null => {
           "anthropic-dangerous-direct-browser-access": "true",
         },
       });
-
-      return wrapWithThinkingMiddleware(anthropicProvider(conn.modelId));
+      return wrapWithThinkingMiddleware(provider(conn.modelId));
     }
 
-    if (conn.providerId === "google_generative_ai") {
-      const googleProvider = createGoogleGenerativeAI({
+    case "google_generative_ai": {
+      const provider = createGoogleGenerativeAI({
         fetch: tauriFetch,
         baseURL: conn.baseUrl,
         apiKey: conn.apiKey,
       });
-
-      return wrapWithThinkingMiddleware(googleProvider(conn.modelId));
+      return wrapWithThinkingMiddleware(provider(conn.modelId));
     }
 
-    if (conn.providerId === "openrouter") {
-      const openRouterProvider = createOpenRouter({
+    case "openrouter": {
+      const provider = createOpenRouter({
         fetch: tauriFetch,
         apiKey: conn.apiKey,
         extraBody: {
@@ -107,173 +252,28 @@ export const useLanguageModel = (): Exclude<LanguageModel, string> | null => {
           },
         },
       });
-
-      return wrapWithThinkingMiddleware(openRouterProvider(conn.modelId));
+      return wrapWithThinkingMiddleware(provider(conn.modelId));
     }
 
-    if (conn.providerId === "openai") {
-      const openAIProvider = createOpenAI({
+    case "openai": {
+      const provider = createOpenAI({
         fetch: tauriFetch,
         apiKey: conn.apiKey,
       });
-
-      return wrapWithThinkingMiddleware(openAIProvider(conn.modelId));
+      return wrapWithThinkingMiddleware(provider(conn.modelId));
     }
 
-    const config: Parameters<typeof createOpenAICompatible>[0] = {
-      fetch: tauriFetch,
-      name: conn.providerId,
-      baseURL: conn.baseUrl,
-    };
-
-    if (conn.apiKey) {
-      config.apiKey = conn.apiKey;
-    }
-
-    const openAICompatibleProvider = createOpenAICompatible(config);
-
-    return wrapWithThinkingMiddleware(
-      openAICompatibleProvider.chatModel(conn.modelId),
-    );
-  }, [conn]);
-};
-
-export const useLLMConnection = (): LLMConnectionResult => {
-  const auth = useAuth();
-  const billing = useBillingAccess();
-
-  const { current_llm_provider, current_llm_model } = main.UI.useValues(
-    main.STORE_ID,
-  );
-  const providerConfig = main.UI.useRow(
-    "ai_providers",
-    current_llm_provider ?? "",
-    main.STORE_ID,
-  ) as AIProviderStorage | undefined;
-
-  return useMemo<LLMConnectionResult>(() => {
-    if (!current_llm_provider) {
-      return {
-        conn: null,
-        status: { status: "pending", reason: "missing_provider" },
-      };
-    }
-
-    const providerId = current_llm_provider as ProviderId;
-
-    if (!current_llm_model) {
-      return {
-        conn: null,
-        status: {
-          status: "pending",
-          reason: "missing_model",
-          providerId,
-        },
-      };
-    }
-
-    const providerDefinition = PROVIDERS.find(
-      (provider) => provider.id === current_llm_provider,
-    );
-
-    if (!providerDefinition) {
-      return {
-        conn: null,
-        status: {
-          status: "error",
-          reason: "provider_not_found",
-          providerId: current_llm_provider,
-        },
+    default: {
+      const config: Parameters<typeof createOpenAICompatible>[0] = {
+        fetch: tauriFetch,
+        name: conn.providerId,
+        baseURL: conn.baseUrl,
       };
-    }
-
-    if (providerId === "hyprnote") {
-      if (!auth?.session) {
-        return {
-          conn: null,
-          status: { status: "error", reason: "unauthenticated", providerId },
-        };
+      if (conn.apiKey) {
+        config.apiKey = conn.apiKey;
       }
-
-      if (!billing.isPro) {
-        return {
-          conn: null,
-          status: { status: "error", reason: "not_pro", providerId },
-        };
-      }
-
-      const conn: LLMConnectionInfo = {
-        providerId,
-        modelId: current_llm_model,
-        baseUrl: `${env.VITE_API_URL}`,
-        apiKey: auth.session.access_token,
-      };
-
-      return {
-        conn,
-        status: { status: "success", providerId, isHosted: true },
-      };
-    }
-
-    const baseUrl =
-      providerConfig?.base_url?.trim() ||
-      providerDefinition.baseUrl?.trim() ||
-      "";
-    const apiKey = providerConfig?.api_key?.trim() || "";
-
-    const missing: Array<"base_url" | "api_key"> = [];
-
-    if (!baseUrl) {
-      missing.push("base_url");
-    }
-
-    if (providerDefinition.apiKey && !apiKey) {
-      missing.push("api_key");
+      const provider = createOpenAICompatible(config);
+      return wrapWithThinkingMiddleware(provider.chatModel(conn.modelId));
     }
-
-    if (missing.length > 0) {
-      return {
-        conn: null,
-        status: {
-          status: "error",
-          reason: "missing_config",
-          providerId,
-          missing,
-        },
-      };
-    }
-
-    const conn: LLMConnectionInfo = {
-      providerId,
-      modelId: current_llm_model,
-      baseUrl,
-      apiKey,
-    };
-
-    return {
-      conn,
-      status: { status: "success", providerId, isHosted: false },
-    };
-  }, [
-    auth,
-    billing.isPro,
-    current_llm_model,
-    current_llm_provider,
-    providerConfig,
-  ]);
-};
-
-export const useLLMConnectionStatus = (): LLMConnectionStatus => {
-  const { status } = useLLMConnection();
-  return status;
-};
-
-const wrapWithThinkingMiddleware = (model: Exclude<LanguageModel, string>) => {
-  return wrapLanguageModel({
-    model,
-    middleware: [
-      extractReasoningMiddleware({ tagName: "think" }),
-      extractReasoningMiddleware({ tagName: "thinking" }),
-    ],
-  });
+  }
 };