Merge branch 'main' into feat/extensions-utils

csmith49 · web-flow · commit 66794785190f · 2026-04-10T11:31:33.000-06:00
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -210,8 +210,9 @@ jobs:
             ARCH: ${{ matrix.arch }}
             TARGET: binary
             PLATFORM: ${{ matrix.platform }}
-            # Use PR head SHA for pull requests to match the image tag expected by run-examples.yml
-            GITHUB_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+            # Use SDK_SHA for PR head SHA - GITHUB_SHA is a built-in that gets overwritten by checkout
+            # build.py checks SDK_SHA before GITHUB_SHA (see _git_info priority order)
+            SDK_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
             GITHUB_REF: ${{ github.ref }}
             CI: 'true'
 
@@ -254,7 +255,8 @@ jobs:
                   echo "tags=$TAGS" >> $GITHUB_OUTPUT
 
                   # Extract short SHA for consolidation
-                  SHORT_SHA=$(echo ${{ github.sha }} | cut -c1-7)
+                  # Use SDK_SHA env var (set above to PR head SHA for PRs)
+                  SHORT_SHA=$(echo $SDK_SHA | cut -c1-7)
                   echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT
 
                   # Extract versioned tags CSV for consolidation
diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py
@@ -9,6 +9,7 @@
 from openai.types.responses.response_output_text import ResponseOutputText
 from pydantic import SecretStr
 
+from openhands.sdk import ConversationStats, RegistryEvent
 from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent
 from openhands.sdk.llm.exceptions import LLMNoResponseError
 from openhands.sdk.llm.options.responses_options import select_responses_options
@@ -1095,6 +1096,84 @@ def test_issue_2459_restore_metrics_syncs_telemetry():
     assert llm.telemetry.metrics is llm.metrics
 
 
+@pytest.fixture
+def llm():
+    """Create a minimal SDK LLM for testing."""
+    return LLM(
+        model="openai/gpt-4o",
+        api_key=SecretStr("test-key"),
+        usage_id="test-service",
+    )
+
+
+def test_cost_recorded_in_restored_metrics(llm):
+    """Costs added via telemetry after restore must land in the restored Metrics."""
+    restored = Metrics(model_name="openai/gpt-4o")
+    restored.add_cost(5.00)
+    llm.restore_metrics(restored)
+
+    llm.telemetry.metrics.add_cost(0.50)
+
+    assert llm.metrics.accumulated_cost == 5.50
+    assert len(llm.metrics.costs) == 2
+
+
+def test_stale_metrics_not_updated(llm):
+    """The original (pre-restore) Metrics must not receive new costs."""
+    original_metrics = llm.metrics
+
+    restored = Metrics(model_name="openai/gpt-4o")
+    restored.add_cost(2.00)
+    llm.restore_metrics(restored)
+
+    llm.telemetry.metrics.add_cost(0.75)
+
+    assert original_metrics.accumulated_cost == 0.0
+    assert llm.metrics.accumulated_cost == 2.75
+
+
+def test_restore_metrics_telemetry_none():
+    """restore_metrics() must not crash when telemetry has not been initialized."""
+    llm = LLM(
+        model="openai/gpt-4o",
+        api_key=SecretStr("test-key"),
+        usage_id="test-service",
+    )
+    llm._telemetry = None
+
+    restored = Metrics(model_name="openai/gpt-4o")
+    restored.add_cost(1.00)
+    llm.restore_metrics(restored)
+
+    assert llm.metrics is restored
+    assert llm.metrics.accumulated_cost == 1.00
+
+
+def test_conversation_stats_restore_then_track():
+    """End-to-end: ConversationStats restores metrics, then new costs are tracked."""
+    saved_metrics = Metrics(model_name="openai/gpt-4o")
+    saved_metrics.add_cost(10.00)
+
+    stats = ConversationStats(usage_to_metrics={"agent": saved_metrics})
+
+    with patch("openhands.sdk.llm.llm.litellm_completion"):
+        llm = LLM(
+            model="openai/gpt-4o",
+            api_key=SecretStr("test-key"),
+            usage_id="agent",
+        )
+        event = RegistryEvent(llm=llm)
+        stats.register_llm(event)
+
+        assert llm.metrics.accumulated_cost == 10.00
+
+        # Simulate a new LLM response adding cost via telemetry
+        llm.telemetry.metrics.add_cost(0.25)
+
+        assert llm.metrics.accumulated_cost == 10.25
+        assert stats.get_combined_metrics().accumulated_cost == 10.25
+
+
 # max_output_tokens Capping Tests