Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions packages/node/iii/src/telemetry-system/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import { SharedEngineConnection } from './connection'
import { EngineSpanExporter, EngineMetricsExporter, EngineLogExporter } from './exporters'
import { extractTraceparent } from './context'
import { patchGlobalFetch, unpatchGlobalFetch } from './fetch-instrumentation'
import { parseIntegerEnv, parseNumberEnv } from './utils'

// Re-export everything from submodules
export * from './types'
Expand Down Expand Up @@ -166,11 +167,27 @@ export function initOtel(config: OtelConfig = {}): void {

// Initialize logs (always enabled when OTEL is enabled)
const logExporter = new EngineLogExporter(sharedConnection)
const logsScheduledDelayMillis =
config.logsFlushIntervalMs ??
parseNumberEnv(process.env.OTEL_LOGS_FLUSH_INTERVAL_MS, 0) ??
DEFAULT_OTEL_CONFIG.logsFlushIntervalMs
const logsMaxExportBatchSize =
config.logsBatchSize ??
parseIntegerEnv(process.env.OTEL_LOGS_BATCH_SIZE, 1) ??
DEFAULT_OTEL_CONFIG.logsBatchSize

loggerProvider = new LoggerProvider({ resource })
loggerProvider.addLogRecordProcessor(new BatchLogRecordProcessor(logExporter))
loggerProvider.addLogRecordProcessor(
new BatchLogRecordProcessor(logExporter, {
scheduledDelayMillis: logsScheduledDelayMillis,
maxExportBatchSize: logsMaxExportBatchSize,
}),
)
logger = loggerProvider.getLogger(serviceName)

console.debug('[OTel] Logs initialized')
console.debug(
`[OTel] Logs initialized: delay=${logsScheduledDelayMillis}ms, batch=${logsMaxExportBatchSize}`,
)
}

/**
Expand Down
6 changes: 6 additions & 0 deletions packages/node/iii/src/telemetry-system/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ export interface OtelConfig {
metricsEnabled?: boolean
/** Metrics export interval in milliseconds. Defaults to 60000 (60 seconds). */
metricsExportIntervalMs?: number
/** Log processor flush delay in milliseconds. Defaults to 100ms. */
logsFlushIntervalMs?: number
/** Maximum number of log records exported per batch. Defaults to 1. */
logsBatchSize?: number
/** Whether to auto-instrument globalThis.fetch calls. Defaults to true. Works on Node.js, Bun, and Deno. Set to false to disable. */
fetchInstrumentationEnabled?: boolean
/** Optional reconnection configuration for the WebSocket connection. */
Expand All @@ -79,6 +83,8 @@ export const DEFAULT_OTEL_CONFIG = {
engineWsUrl: 'ws://localhost:49134',
metricsEnabled: true,
metricsExportIntervalMs: 60000,
logsFlushIntervalMs: 100,
logsBatchSize: 1,
fetchInstrumentationEnabled: true,
} as const satisfies Partial<OtelConfig>

Expand Down
21 changes: 21 additions & 0 deletions packages/node/iii/src/telemetry-system/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* Parse a numeric environment variable with optional minimum bound.
*/
export function parseNumberEnv(value: string | undefined, minimum: number = 0): number | undefined {
if (value === undefined) return undefined
const parsed = Number(value)
if (!Number.isFinite(parsed) || parsed < minimum) return undefined
return parsed
Comment on lines +4 to +8
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# Find the file and examine its structure
find . -type f -name "utils.ts" | grep -i telemetry

Repository: iii-hq/sdk

Length of output: 103


🏁 Script executed:

# Also check the overall structure of the packages/node directory
git ls-files packages/node/ | head -30

Repository: iii-hq/sdk

Length of output: 1209


🏁 Script executed:

# Search for parseNumberEnv usage across the codebase
rg -l "parseNumberEnv" --type=ts

Repository: iii-hq/sdk

Length of output: 149


🏁 Script executed:

# Read the utils.ts file to see the implementation
cat -n packages/node/iii/src/telemetry-system/utils.ts

Repository: iii-hq/sdk

Length of output: 840


🏁 Script executed:

# Check the index.ts to see how parseNumberEnv is used
cat -n packages/node/iii/src/telemetry-system/index.ts

Repository: iii-hq/sdk

Length of output: 11461


🏁 Script executed:

# Search for all usages of parseNumberEnv with context
rg -n "parseNumberEnv" -C5 --type=ts

Repository: iii-hq/sdk

Length of output: 3660


🏁 Script executed:

# Search for test files related to this utility
find . -type f \( -name "*test*" -o -name "*spec*" \) -path "*/telemetry-system/*" | head -20

Repository: iii-hq/sdk

Length of output: 36


🏁 Script executed:

# Search for test files in the entire packages/node/iii directory
find packages/node/iii -type f \( -name "*.test.ts" -o -name "*.spec.ts" \)

Repository: iii-hq/sdk

Length of output: 581


🏁 Script executed:

# Search for any tests referencing parseNumberEnv or parseIntegerEnv
rg -n "(parseNumberEnv|parseIntegerEnv)" --type=ts -g "*test*" -g "*spec*"

Repository: iii-hq/sdk

Length of output: 36


🏁 Script executed:

# Check DEFAULT_OTEL_CONFIG to understand the defaults
rg -n "DEFAULT_OTEL_CONFIG\|logsFlushIntervalMs\|logsBatchSize" --type=ts -B2 -A2

Repository: iii-hq/sdk

Length of output: 36


🏁 Script executed:

# Check if there are any guards before the parseNumberEnv calls
rg -n "OTEL_LOGS_FLUSH_INTERVAL_MS\|OTEL_LOGS_BATCH_SIZE" --type=ts -B3 -A3

Repository: iii-hq/sdk

Length of output: 36


🏁 Script executed:

# Look at the otel-defaults test file
cat -n packages/node/iii/tests/otel-defaults.test.ts

Repository: iii-hq/sdk

Length of output: 9587


🏁 Script executed:

# Search for DEFAULT_OTEL_CONFIG definition
rg -n "DEFAULT_OTEL_CONFIG" --type=ts

Repository: iii-hq/sdk

Length of output: 1386


🏁 Script executed:

# Search for logsFlushIntervalMs and logsBatchSize in types.ts
cat packages/node/iii/src/telemetry-system/types.ts

Repository: iii-hq/sdk

Length of output: 3960


🏁 Script executed:

# Test JavaScript Number coercion behavior to confirm the claim
node -e "console.log('Number(\"\") =', Number('')); console.log('Number(\"   \") =', Number('   ')); console.log('Number(\"0\") =', Number('0')); console.log('Number(\"abc\") =', Number('abc'))"

Repository: iii-hq/sdk

Length of output: 122


Add empty-string validation before numeric coercion in parseNumberEnv.

Environment variables like OTEL_LOGS_FLUSH_INTERVAL_MS="" pass Number("") which coerces to 0, silently overriding defaults (e.g., logsFlushIntervalMs: 100, logsBatchSize: 1). This causes incorrect configuration—empty env vars should be treated as unset, not as zero.

Currently called on lines 172 and 176 of index.ts with no upstream guards.

Proposed fix
 export function parseNumberEnv(value: string | undefined, minimum: number = 0): number | undefined {
   if (value === undefined) return undefined
-  const parsed = Number(value)
+  const normalized = value.trim()
+  if (normalized.length === 0) return undefined
+  const parsed = Number(normalized)
   if (!Number.isFinite(parsed) || parsed < minimum) return undefined
   return parsed
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
export function parseNumberEnv(value: string | undefined, minimum: number = 0): number | undefined {
if (value === undefined) return undefined
const parsed = Number(value)
if (!Number.isFinite(parsed) || parsed < minimum) return undefined
return parsed
export function parseNumberEnv(value: string | undefined, minimum: number = 0): number | undefined {
if (value === undefined) return undefined
const normalized = value.trim()
if (normalized.length === 0) return undefined
const parsed = Number(normalized)
if (!Number.isFinite(parsed) || parsed < minimum) return undefined
return parsed
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@packages/node/iii/src/telemetry-system/utils.ts` around lines 4 - 8,
parseNumberEnv currently coerces an empty string ("") to 0 via Number("") which
can override defaults; update parseNumberEnv to treat empty-string inputs as
undefined before numeric coercion by returning undefined if value === "" (in
addition to value === undefined), then continue parsing with Number(value) and
validate against Number.isFinite(parsed) and parsed < minimum as before;
reference the function parseNumberEnv so reviewers can locate and update its
handling of empty-string env values.

}

/**
* Parse an integer environment variable with optional minimum bound.
*/
export function parseIntegerEnv(
value: string | undefined,
minimum: number = 0,
): number | undefined {
const parsed = parseNumberEnv(value, minimum)
if (parsed === undefined || !Number.isInteger(parsed)) return undefined
return parsed
}
85 changes: 65 additions & 20 deletions packages/python/iii/src/iii/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
import os
import uuid
from typing import Any
from typing import Any, cast

from .telemetry_types import OtelConfig

Expand Down Expand Up @@ -44,10 +44,11 @@ def init_otel(

cfg = config or OtelConfig()

enabled = cfg.enabled
if enabled is None:
env = os.environ.get("OTEL_ENABLED", "").lower()
if cfg.enabled is not None:
enabled = cfg.enabled
else:
# Enabled by default; set OTEL_ENABLED=false/0/no/off to disable
env = os.environ.get("OTEL_ENABLED", "").lower()
enabled = env not in ("false", "0", "no", "off")

if not enabled:
Expand Down Expand Up @@ -113,7 +114,7 @@ def init_otel(
# --- Log exporter ---
logs_enabled = cfg.logs_enabled if cfg.logs_enabled is not None else True
if logs_enabled:
_configure_log_provider(resource, _connection)
_configure_log_provider(resource, _connection, cfg)

_initialized = True

Expand Down Expand Up @@ -152,7 +153,32 @@ def _configure_meter_provider(
_meter = meter_provider.get_meter(service_name)


def _configure_log_provider(resource: Any, connection: Any) -> None:
def _resolve_int(
config_value: int | None,
env_var: str,
default: int,
minimum: int = 0,
) -> int:
"""Resolve an integer setting: explicit config > env var > default.

Matches the Node SDK's resolution order for cross-SDK consistency.
"""
if config_value is not None:
return config_value

raw = os.environ.get(env_var)
if raw is not None:
try:
val = int(raw)
if val >= minimum:
return val
except (ValueError, TypeError):
pass

return default


def _configure_log_provider(resource: Any, connection: Any, cfg: OtelConfig) -> None:
"""Set up a global SdkLoggerProvider with EngineLogExporter."""
global _log_provider
try:
Expand All @@ -168,11 +194,31 @@ def _configure_log_provider(resource: Any, connection: Any) -> None:
return

log_exporter = EngineLogExporter(connection)

logs_flush_interval_ms = _resolve_int(
cfg.logs_flush_interval_ms, "OTEL_LOGS_FLUSH_INTERVAL_MS", default=100,
)
logs_batch_size = _resolve_int(
cfg.logs_batch_size, "OTEL_LOGS_BATCH_SIZE", default=1, minimum=1,
)

log_provider = SdkLoggerProvider(resource=resource)
log_provider.add_log_record_processor(BatchLogRecordProcessor(log_exporter)) # type: ignore[arg-type]
log_provider.add_log_record_processor(
BatchLogRecordProcessor(
cast(Any, log_exporter),
schedule_delay_millis=logs_flush_interval_ms,
max_export_batch_size=logs_batch_size,
)
)
_logs.set_logger_provider(log_provider)
_log_provider = log_provider

logging.getLogger("iii.telemetry").debug(
"Log provider configured: flush_interval=%dms, batch_size=%d",
logs_flush_interval_ms,
logs_batch_size,
)


_original_opener_open: Any = None

Expand Down Expand Up @@ -297,6 +343,15 @@ async def shutdown_otel_async() -> None:
_reset_state()


def _shutdown_provider(provider: Any) -> None:
"""Call shutdown() on a provider, silently ignoring errors."""
try:
if provider is not None and hasattr(provider, "shutdown"):
provider.shutdown()
except Exception:
pass


def _reset_state() -> None:
global _tracer, _meter, _meter_provider, _log_provider, _connection, _initialized, _fetch_patched

Expand All @@ -312,21 +367,11 @@ def _reset_state() -> None:
if _initialized:
try:
from opentelemetry import trace
provider = trace.get_tracer_provider()
if hasattr(provider, "shutdown"):
provider.shutdown()
except Exception:
pass
try:
if _meter_provider and hasattr(_meter_provider, "shutdown"):
_meter_provider.shutdown()
except Exception:
pass
try:
if _log_provider and hasattr(_log_provider, "shutdown"):
_log_provider.shutdown()
_shutdown_provider(trace.get_tracer_provider())
except Exception:
pass
_shutdown_provider(_meter_provider)
_shutdown_provider(_log_provider)

_tracer = None
_meter = None
Expand Down
6 changes: 6 additions & 0 deletions packages/python/iii/src/iii/telemetry_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ class OtelConfig:
logs_enabled: bool | None = None
"""Enable OTel log export via EngineLogExporter. Defaults to True when OTel is enabled."""

logs_flush_interval_ms: int | None = None
"""Log processor flush delay in milliseconds. Defaults to 100ms when not set."""

logs_batch_size: int | None = None
"""Maximum number of log records exported per batch. Defaults to 1 when not set."""

metrics_enabled: bool = True
"""Enable OTel metrics export via EngineMetricsExporter. Defaults to True."""

Expand Down
93 changes: 75 additions & 18 deletions packages/python/iii/tests/test_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,25 @@
ORIGINAL_OPENER_OPEN = urllib.request.OpenerDirector.open


def _reset_otel_singleton(module_path: str, provider_attr: str, set_once_attr: str) -> None:
"""Reset one OTel global singleton (provider + its SetOnce guard)."""
try:
import importlib
mod = importlib.import_module(module_path)
setattr(mod, provider_attr, None)
getattr(mod, set_once_attr)._done = False
except Exception:
pass


@pytest.fixture(autouse=True)
def cleanup():
yield
shutdown_otel()
# Reset all OTel global singletons so tests don't bleed state
try:
import opentelemetry._logs._internal as _li
_li._LOGGER_PROVIDER = None
_li._LOGGER_PROVIDER_SET_ONCE._done = False
except Exception:
pass
try:
import opentelemetry.trace._internal as _ti
_ti._TRACER_PROVIDER = None
_ti._TRACER_PROVIDER_SET_ONCE._done = False
except Exception:
pass
try:
import opentelemetry.metrics._internal as _mi
_mi._METER_PROVIDER = None
_mi._METER_PROVIDER_SET_ONCE._done = False
except Exception:
pass
_reset_otel_singleton("opentelemetry._logs._internal", "_LOGGER_PROVIDER", "_LOGGER_PROVIDER_SET_ONCE")
_reset_otel_singleton("opentelemetry.trace._internal", "_TRACER_PROVIDER", "_TRACER_PROVIDER_SET_ONCE")
_reset_otel_singleton("opentelemetry.metrics._internal", "_METER_PROVIDER", "_METER_PROVIDER_SET_ONCE")
urllib.request.OpenerDirector.open = ORIGINAL_OPENER_OPEN


Expand Down Expand Up @@ -143,3 +139,64 @@ def test_shutdown_closes_connection():
init_otel(OtelConfig(enabled=True))
asyncio.run(shutdown_otel_async())
mock_shutdown.assert_called_once()


def test_init_configures_log_batch_params_defaults():
"""Default batch config: 100ms flush, batch size 1."""
from opentelemetry._logs import get_logger_provider
from opentelemetry.sdk._logs import LoggerProvider as SdkLoggerProvider
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor

init_otel(OtelConfig(enabled=True))
lp = get_logger_provider()
assert isinstance(lp, SdkLoggerProvider)
# Verify processor was created with no errors (config was applied)
processors = lp._multi_log_record_processor._log_record_processors
blrp = next((p for p in processors if isinstance(p, BatchLogRecordProcessor)), None)
assert blrp is not None


def test_init_log_batch_config_explicit_overrides():
"""Explicit config values override env vars and defaults."""
from opentelemetry._logs import get_logger_provider
from opentelemetry.sdk._logs import LoggerProvider as SdkLoggerProvider

init_otel(OtelConfig(enabled=True, logs_flush_interval_ms=200, logs_batch_size=5))
lp = get_logger_provider()
assert isinstance(lp, SdkLoggerProvider)


def test_init_log_batch_config_env_var_override(monkeypatch):
"""Env vars override defaults when config fields are None."""
from opentelemetry._logs import get_logger_provider
from opentelemetry.sdk._logs import LoggerProvider as SdkLoggerProvider

monkeypatch.setenv("OTEL_LOGS_FLUSH_INTERVAL_MS", "500")
monkeypatch.setenv("OTEL_LOGS_BATCH_SIZE", "10")
init_otel(OtelConfig(enabled=True))
lp = get_logger_provider()
assert isinstance(lp, SdkLoggerProvider)


def test_init_log_batch_config_explicit_beats_env(monkeypatch):
"""Explicit config value takes precedence over env var."""
from opentelemetry._logs import get_logger_provider
from opentelemetry.sdk._logs import LoggerProvider as SdkLoggerProvider

monkeypatch.setenv("OTEL_LOGS_FLUSH_INTERVAL_MS", "999")
monkeypatch.setenv("OTEL_LOGS_BATCH_SIZE", "99")
init_otel(OtelConfig(enabled=True, logs_flush_interval_ms=200, logs_batch_size=5))
lp = get_logger_provider()
assert isinstance(lp, SdkLoggerProvider)


def test_init_log_batch_config_invalid_env_ignored(monkeypatch):
"""Invalid env var values are silently ignored, falling through to defaults."""
from opentelemetry._logs import get_logger_provider
from opentelemetry.sdk._logs import LoggerProvider as SdkLoggerProvider

monkeypatch.setenv("OTEL_LOGS_FLUSH_INTERVAL_MS", "not-a-number")
monkeypatch.setenv("OTEL_LOGS_BATCH_SIZE", "-5")
init_otel(OtelConfig(enabled=True))
lp = get_logger_provider()
assert isinstance(lp, SdkLoggerProvider)
12 changes: 12 additions & 0 deletions packages/python/iii/tests/test_telemetry_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,15 @@ def test_logs_enabled_defaults_to_none():
def test_engine_ws_url_exists():
cfg = OtelConfig(engine_ws_url="ws://custom:1234")
assert cfg.engine_ws_url == "ws://custom:1234"


def test_logs_batch_config_defaults_to_none():
cfg = OtelConfig()
assert cfg.logs_flush_interval_ms is None
assert cfg.logs_batch_size is None


def test_logs_batch_config_explicit():
cfg = OtelConfig(logs_flush_interval_ms=500, logs_batch_size=10)
assert cfg.logs_flush_interval_ms == 500
assert cfg.logs_batch_size == 10
2 changes: 1 addition & 1 deletion packages/python/iii/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion packages/rust/iii/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading