-
Notifications
You must be signed in to change notification settings - Fork 52
Fix Observability: trace quality, cost reduction, and new metrics #1133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
73729a6
ae8a997
61ed616
a5060ad
3b6c792
36f17b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,68 +1,36 @@ | ||
| import * as log from "@std/log"; | ||
| import { Logger } from "@std/log/logger"; | ||
| import { Context, context } from "../../deco.ts"; | ||
| import denoJSON from "../../deno.json" with { type: "json" }; | ||
| import { | ||
| BatchSpanProcessor, | ||
| diag, | ||
| DiagConsoleLogger, | ||
| DiagLogLevel, | ||
| FetchInstrumentation, | ||
| NodeTracerProvider, | ||
| opentelemetry, | ||
| OTLPTraceExporter, | ||
| ParentBasedSampler, | ||
| registerInstrumentations, | ||
| Resource, | ||
| SemanticResourceAttributes, | ||
| } from "../../deps.ts"; | ||
| import { DenoRuntimeInstrumentation } from "./instrumentation/deno-runtime.ts"; | ||
|
|
||
| if (Deno.env.has("OTEL_DIAG")) { | ||
| diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG); | ||
| } | ||
| import "./instrumentation/deno-runtime.ts"; | ||
| import { DebugSampler } from "./samplers/debug.ts"; | ||
| import { type SamplingOptions, URLBasedSampler } from "./samplers/urlBased.ts"; | ||
|
|
||
| import { ENV_SITE_NAME } from "../../engine/decofile/constants.ts"; | ||
| import { safeImportResolve } from "../../engine/importmap/builder.ts"; | ||
| import { FilteringSpanProcessor } from "./processors/filtering.ts"; | ||
| import { OTEL_IS_ENABLED, resource } from "./resource.ts"; | ||
| import { OpenTelemetryHandler } from "./logger.ts"; | ||
|
|
||
| const tryGetVersionOf = (pkg: string) => { | ||
| try { | ||
| const [_, ver] = safeImportResolve(pkg).split("@"); | ||
| return ver.substring(0, ver.length - 1); | ||
| } catch { | ||
| return undefined; | ||
| } | ||
| }; | ||
| const apps_ver = tryGetVersionOf("apps/") ?? | ||
| tryGetVersionOf("deco-sites/std/") ?? "_"; | ||
|
|
||
| export const resource = Resource.default().merge( | ||
| new Resource({ | ||
| [SemanticResourceAttributes.SERVICE_NAME]: Deno.env.get(ENV_SITE_NAME) ?? | ||
| "deco", | ||
| [SemanticResourceAttributes.SERVICE_VERSION]: | ||
| Context.active().deploymentId ?? | ||
| Deno.hostname(), | ||
| [SemanticResourceAttributes.SERVICE_INSTANCE_ID]: crypto.randomUUID(), | ||
| [SemanticResourceAttributes.CLOUD_PROVIDER]: context.platform, | ||
| "deco.runtime.version": denoJSON.version, | ||
| "deco.apps.version": apps_ver, | ||
| [SemanticResourceAttributes.CLOUD_REGION]: Deno.env.get("DENO_REGION") ?? | ||
| "unknown", | ||
| [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: Deno.env.get( | ||
| "DECO_ENV_NAME", | ||
| ) | ||
| ? `env-${Deno.env.get("DECO_ENV_NAME")}` | ||
| : "production", | ||
| }), | ||
| ); | ||
|
|
||
| const loggerName = "deco-logger"; | ||
| export const OTEL_IS_ENABLED: boolean = Deno.env.has( | ||
| "OTEL_EXPORTER_OTLP_ENDPOINT", | ||
| ); | ||
| export const logger: Logger = new Logger(loggerName, "INFO", { | ||
| handlers: [ | ||
| ...OTEL_IS_ENABLED | ||
| ? [ | ||
| new OpenTelemetryHandler("INFO", { | ||
| resourceAttributes: resource.attributes, | ||
| detectResources: false, | ||
| }), | ||
| ] | ||
| : [new log.ConsoleHandler("INFO")], | ||
|
|
@@ -82,6 +50,7 @@ registerInstrumentations({ | |
| // @ts-ignore: no idea why this is failing, but it should work | ||
| new FetchInstrumentation( | ||
| { | ||
| ignoreUrls: [/127\.0\.0\.1/, /localhost/], | ||
| applyCustomAttributesOnSpan: ( | ||
| span, | ||
| _req, | ||
|
|
@@ -101,7 +70,6 @@ registerInstrumentations({ | |
| }, | ||
| }, | ||
| ), | ||
| new DenoRuntimeInstrumentation(), | ||
| ], | ||
| }); | ||
|
|
||
|
|
@@ -125,8 +93,9 @@ const parseSamplingOptions = (): SamplingOptions | undefined => { | |
| } | ||
| }; | ||
|
|
||
| const samplingOptions = parseSamplingOptions(); | ||
| const debugSampler = new DebugSampler( | ||
| new URLBasedSampler(parseSamplingOptions()), | ||
| new URLBasedSampler(samplingOptions), | ||
| ); | ||
| const provider = new NodeTracerProvider({ | ||
| resource: resource, | ||
|
|
@@ -137,17 +106,36 @@ const provider = new NodeTracerProvider({ | |
| ), | ||
| }); | ||
|
|
||
| if (OTEL_IS_ENABLED) { | ||
| // Deno 2.2+ has built-in OTel support via OTEL_DENO=true. | ||
| // If enabled, it sets its own global TracerProvider and instruments fetch/console, | ||
| // which would conflict with our manual setup (double spans, double logs). | ||
| // Skip provider.register() when Deno's native OTel is active. | ||
| const DENO_OTEL_ACTIVE = Deno.env.get("OTEL_DENO") === "true"; | ||
|
|
||
|
|
||
| if (OTEL_IS_ENABLED && !DENO_OTEL_ACTIVE) { | ||
| const traceExporter = new OTLPTraceExporter(); | ||
| // @ts-ignore: no idea why this is failing, but it should work | ||
| provider.addSpanProcessor(new BatchSpanProcessor(traceExporter)); | ||
| provider.addSpanProcessor( | ||
| new FilteringSpanProcessor( | ||
| // @ts-ignore: no idea why this is failing, but it should work | ||
| new BatchSpanProcessor(traceExporter), | ||
| samplingOptions, | ||
| ), | ||
| ); | ||
|
|
||
| provider.register(); | ||
|
|
||
| addEventListener("unload", () => { | ||
| provider.shutdown().catch(() => {}); | ||
| }); | ||
| } | ||
|
Comment on lines
+109
to
131
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inconsistent The tracer provider registration correctly skips when Consider applying the same guard in // In metrics.ts
if (OTEL_IS_ENABLED && !DENO_OTEL_ACTIVE) {
// ... metric exporter setup and unload listener
}#!/bin/bash
# Check if metrics.ts has DENO_OTEL_ACTIVE guard
rg -n "DENO_OTEL_ACTIVE|OTEL_DENO" observability/otel/metrics.ts🤖 Prompt for AI Agents |
||
|
|
||
| export const tracer = opentelemetry.trace.getTracer( | ||
| "deco-tracer", | ||
| ); | ||
| // Use provider.getTracer directly (not via global API) to ensure spans | ||
| // always go through our FilteringSpanProcessor, even if another TracerProvider | ||
| // overrides the global after our provider.register() call. | ||
| export const tracer = OTEL_IS_ENABLED && !DENO_OTEL_ACTIVE | ||
| ? provider.getTracer("deco-tracer") | ||
| : opentelemetry.trace.getTracer("deco-tracer"); | ||
|
|
||
| export const tracerIsRecording = () => | ||
| opentelemetry.trace.getActiveSpan()?.isRecording() ?? false; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,69 +1,54 @@ | ||
| /** | ||
| * Heavily inspired from unlicensed code: https://github.com/cloudydeno/deno-observability/blob/main/instrumentation/deno-runtime.ts | ||
| */ | ||
| import { | ||
| type Attributes, | ||
| InstrumentationBase, | ||
| type InstrumentationConfig, | ||
| type ObservableCounter, | ||
| type ObservableGauge, | ||
| type ObservableResult, | ||
| type ObservableUpDownCounter, | ||
| ValueType, | ||
| } from "../../../deps.ts"; | ||
|
|
||
| export class DenoRuntimeInstrumentation extends InstrumentationBase { | ||
| readonly component: string = "deno-runtime"; | ||
| moduleName = this.component; | ||
|
|
||
| constructor(_config?: InstrumentationConfig) { | ||
| super("deno-runtime", "0.1.0", { enabled: false }); | ||
| import { meter } from "../metrics.ts"; | ||
|
|
||
| const memoryUsage: ObservableGauge<Attributes> = meter | ||
| .createObservableGauge("deno.memory_usage", { | ||
| unit: "By", | ||
| valueType: ValueType.DOUBLE, | ||
| description: "Deno process memory usage in bytes.", | ||
| }); | ||
|
|
||
| const openResources: ObservableUpDownCounter<Attributes> = meter | ||
| .createObservableUpDownCounter("deno.open_resources", { | ||
| valueType: ValueType.DOUBLE, | ||
| description: "Number of open resources of a particular type.", | ||
| }); | ||
|
|
||
| const gatherMemoryUsage = (x: ObservableResult<Attributes>) => { | ||
| const usage = Deno.memoryUsage(); | ||
| x.observe(usage.rss, { "deno.memory.type": "rss" }); | ||
| x.observe(usage.heapTotal, { "deno.memory.type": "heap_total" }); | ||
| x.observe(usage.heapUsed, { "deno.memory.type": "heap_used" }); | ||
| x.observe(usage.external, { "deno.memory.type": "external" }); | ||
| }; | ||
|
|
||
| const gatherOpenResources = (x: ObservableResult<Attributes>) => { | ||
| try { | ||
| // deno-lint-ignore no-explicit-any | ||
| const resources = (Deno as any).resources?.() as | ||
| | Record<string, string> | ||
| | undefined; | ||
| if (!resources) return; | ||
| const counts: Record<string, number> = {}; | ||
| for (const type of Object.values(resources)) { | ||
| counts[type] = (counts[type] ?? 0) + 1; | ||
| } | ||
| for (const [type, count] of Object.entries(counts)) { | ||
| x.observe(count, { "deno.resource.type": type }); | ||
| } | ||
| } catch { | ||
| // Deno.resources() may not be available in all environments | ||
| } | ||
| }; | ||
|
|
||
| metrics!: { | ||
| openResources: ObservableUpDownCounter<Attributes>; | ||
| memoryUsage: ObservableGauge<Attributes>; | ||
| dispatchedCtr: ObservableCounter<Attributes>; | ||
| inflightCtr: ObservableUpDownCounter<Attributes>; | ||
| }; | ||
|
|
||
| protected init() {} | ||
|
|
||
| private gatherMemoryUsage = (x: ObservableResult<Attributes>) => { | ||
| const usage = Deno.memoryUsage(); | ||
| x.observe(usage.rss, { "deno.memory.type": "rss" }); | ||
| x.observe(usage.heapTotal, { "deno.memory.type": "heap_total" }); | ||
| x.observe(usage.heapUsed, { "deno.memory.type": "heap_used" }); | ||
| x.observe(usage.external, { "deno.memory.type": "external" }); | ||
| }; | ||
| memoryUsage.addCallback(gatherMemoryUsage); | ||
| openResources.addCallback(gatherOpenResources); | ||
|
|
||
| override enable() { | ||
| this.metrics ??= { | ||
| openResources: this.meter | ||
| .createObservableUpDownCounter("deno.open_resources", { | ||
| valueType: ValueType.DOUBLE, | ||
| description: "Number of open resources of a particular type.", | ||
| }), | ||
| memoryUsage: this.meter | ||
| .createObservableGauge("deno.memory_usage", { | ||
| valueType: ValueType.DOUBLE, | ||
| }), | ||
| dispatchedCtr: this.meter | ||
| .createObservableCounter("deno.ops_dispatched", { | ||
| valueType: ValueType.DOUBLE, | ||
| description: "Total number of Deno op invocations.", | ||
| }), | ||
| inflightCtr: this.meter | ||
| .createObservableUpDownCounter("deno.ops_inflight", { | ||
| valueType: ValueType.DOUBLE, | ||
| description: "Number of currently-inflight Deno ops.", | ||
| }), | ||
| }; | ||
|
|
||
| this.metrics.memoryUsage.addCallback(this.gatherMemoryUsage); | ||
| } | ||
|
|
||
| override disable() { | ||
| this.metrics.memoryUsage.removeCallback(this.gatherMemoryUsage); | ||
| } | ||
| } | ||
| // Kept for backward compatibility — no longer needed but exported to avoid import errors | ||
| export class DenoRuntimeInstrumentation {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: deco-cx/deco
Length of output: 1907
🏁 Script executed:
Repository: deco-cx/deco
Length of output: 1136
🏁 Script executed:
Repository: deco-cx/deco
Length of output: 1968
🏁 Script executed:
sed -n '223,343p' blocks/loader.tsRepository: deco-cx/deco
Length of output: 4122
Avoid emitting
resolver_latencywithstatus: undefined.The early return at line 271 (when
!revisionID) does not assign a value tostatusbefore exiting, causing the finally block to unconditionally record latency metrics withstatus: undefined. Additionally, if any exception occurs before a status assignment, the same issue occurs since there is no catch handler.Initialize
statusto a default value (e.g.,"unknown") and explicitly set it before early returns and in a catch block to ensure all code paths emit consistent metric attributes.🤖 Prompt for AI Agents