Skip to content

Commit 416f2bd

Browse files
fix: always capture system prompt
1 parent b3e21c1 commit 416f2bd

File tree

4 files changed

+298
-18
lines changed

4 files changed

+298
-18
lines changed

posthog/ai/gemini/gemini.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
merge_usage_stats,
2020
)
2121
from posthog.ai.gemini.gemini_converter import (
22-
format_gemini_input,
2322
extract_gemini_usage_from_chunk,
2423
extract_gemini_content_from_chunk,
2524
format_gemini_streaming_output,
@@ -359,7 +358,7 @@ def _capture_streaming_event(
359358
from posthog.ai.types import StreamingEventData
360359

361360
# Prepare standardized event data
362-
formatted_input = self._format_input(contents)
361+
formatted_input = self._format_input(contents, **kwargs)
363362
sanitized_input = sanitize_gemini(formatted_input)
364363

365364
event_data = StreamingEventData(
@@ -381,10 +380,13 @@ def _capture_streaming_event(
381380
# Use the common capture function
382381
capture_streaming_event(self._ph_client, event_data)
383382

384-
def _format_input(self, contents):
383+
def _format_input(self, contents, **kwargs):
385384
"""Format input contents for PostHog tracking"""
385+
from posthog.ai.utils import merge_system_prompt
386386

387-
return format_gemini_input(contents)
387+
# Create kwargs dict with contents for merge_system_prompt
388+
input_kwargs = {"contents": contents, **kwargs}
389+
return merge_system_prompt(input_kwargs, "gemini")
388390

389391
def generate_content_stream(
390392
self,

posthog/ai/openai/openai_converter.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,6 @@ def format_openai_streaming_input(
606606
Returns:
607607
Formatted input ready for PostHog tracking
608608
"""
609-
if api_type == "chat":
610-
return kwargs.get("messages")
611-
else: # responses API
612-
return kwargs.get("input")
609+
from posthog.ai.utils import merge_system_prompt
610+
611+
return merge_system_prompt(kwargs, "openai")

posthog/ai/utils.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import time
22
import uuid
3-
from typing import Any, Callable, Dict, Optional
3+
from typing import Any, Callable, Dict, List, Optional, cast
44

55
from posthog.client import Client as PostHogClient
6-
from posthog.ai.types import StreamingEventData, TokenUsage
6+
from posthog.ai.types import FormattedMessage, StreamingEventData, TokenUsage
77
from posthog.ai.sanitization import (
88
sanitize_openai,
99
sanitize_anthropic,
@@ -158,7 +158,7 @@ def extract_available_tool_calls(provider: str, kwargs: Dict[str, Any]):
158158
return None
159159

160160

161-
def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
161+
def merge_system_prompt(kwargs: Dict[str, Any], provider: str) -> List[FormattedMessage]:
162162
"""
163163
Merge system prompts and format messages for the given provider.
164164
"""
@@ -172,7 +172,17 @@ def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
172172
from posthog.ai.gemini.gemini_converter import format_gemini_input
173173

174174
contents = kwargs.get("contents", [])
175-
return format_gemini_input(contents)
175+
formatted_messages = format_gemini_input(contents)
176+
177+
if kwargs.get("system_instruction") is not None:
178+
system_instruction = kwargs.get("system_instruction")
179+
system_message = cast(FormattedMessage, {
180+
"role": "system",
181+
"content": system_instruction
182+
})
183+
formatted_messages = [system_message] + list(formatted_messages)
184+
185+
return formatted_messages
176186
elif provider == "openai":
177187
from posthog.ai.openai.openai_converter import format_openai_input
178188

@@ -187,9 +197,10 @@ def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
187197
if kwargs.get("system") is not None:
188198
has_system = any(msg.get("role") == "system" for msg in messages)
189199
if not has_system:
190-
messages = [
191-
{"role": "system", "content": kwargs.get("system")}
192-
] + messages
200+
system_msg = cast(FormattedMessage, {
201+
"role": "system", "content": kwargs.get("system")
202+
})
203+
messages = [system_msg] + messages
193204

194205
# For Responses API, add instructions to the system prompt if provided
195206
if kwargs.get("instructions") is not None:
@@ -207,9 +218,10 @@ def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
207218
)
208219
else:
209220
# Create a new system message with instructions
210-
messages = [
211-
{"role": "system", "content": kwargs.get("instructions")}
212-
] + messages
221+
instruction_msg = cast(FormattedMessage, {
222+
"role": "system", "content": kwargs.get("instructions")
223+
})
224+
messages = [instruction_msg] + messages
213225

214226
return messages
215227

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
"""
2+
Tests for system prompt capture across all LLM providers.
3+
4+
This test suite ensures that system prompts are correctly captured in analytics
5+
regardless of how they're passed to the providers:
6+
- As first message in messages/contents array (standard format)
7+
- As separate system parameter (Anthropic, OpenAI)
8+
- As instructions parameter (OpenAI Responses API)
9+
- As system_instruction parameter (Gemini)
10+
"""
11+
12+
import time
13+
import unittest
14+
from unittest.mock import patch, MagicMock
15+
16+
17+
class TestSystemPromptCapture(unittest.TestCase):
18+
"""Test system prompt capture for all providers."""
19+
20+
def setUp(self):
21+
super().setUp()
22+
self.test_system_prompt = "You are a helpful AI assistant."
23+
self.test_user_message = "Hello, how are you?"
24+
self.test_response = "I'm doing well, thank you!"
25+
26+
# Create mock PostHog client
27+
self.client = MagicMock()
28+
self.client.privacy_mode = False
29+
30+
def _assert_system_prompt_captured(self, captured_input):
31+
"""Helper to assert system prompt is correctly captured."""
32+
self.assertEqual(len(captured_input), 2, "Should have 2 messages (system + user)")
33+
self.assertEqual(captured_input[0]["role"], "system", "First message should be system")
34+
self.assertEqual(captured_input[0]["content"], self.test_system_prompt, "System content should match")
35+
self.assertEqual(captured_input[1]["role"], "user", "Second message should be user")
36+
self.assertEqual(captured_input[1]["content"], self.test_user_message, "User content should match")
37+
38+
# OpenAI Tests
39+
def test_openai_messages_array_system_prompt(self):
40+
"""Test OpenAI with system prompt in messages array."""
41+
try:
42+
from posthog.ai.openai import OpenAI
43+
from openai.types.chat import ChatCompletion, ChatCompletionMessage
44+
from openai.types.chat.chat_completion import Choice
45+
from openai.types.completion_usage import CompletionUsage
46+
except ImportError:
47+
self.skipTest("OpenAI package not available")
48+
49+
mock_response = ChatCompletion(
50+
id="test", model="gpt-4", object="chat.completion", created=int(time.time()),
51+
choices=[Choice(finish_reason="stop", index=0, message=ChatCompletionMessage(
52+
content=self.test_response, role="assistant"))],
53+
usage=CompletionUsage(completion_tokens=10, prompt_tokens=20, total_tokens=30),
54+
)
55+
56+
with patch("openai.resources.chat.completions.Completions.create", return_value=mock_response):
57+
client = OpenAI(posthog_client=self.client, api_key="test")
58+
59+
messages = [
60+
{"role": "system", "content": self.test_system_prompt},
61+
{"role": "user", "content": self.test_user_message}
62+
]
63+
64+
client.chat.completions.create(model="gpt-4", messages=messages, posthog_distinct_id="test-user")
65+
66+
self.assertEqual(len(self.client.capture.call_args_list), 1)
67+
properties = self.client.capture.call_args_list[0][1]["properties"]
68+
self._assert_system_prompt_captured(properties["$ai_input"])
69+
70+
def test_openai_separate_system_parameter(self):
71+
"""Test OpenAI with system prompt as separate parameter."""
72+
try:
73+
from posthog.ai.openai import OpenAI
74+
from openai.types.chat import ChatCompletion, ChatCompletionMessage
75+
from openai.types.chat.chat_completion import Choice
76+
from openai.types.completion_usage import CompletionUsage
77+
except ImportError:
78+
self.skipTest("OpenAI package not available")
79+
80+
mock_response = ChatCompletion(
81+
id="test", model="gpt-4", object="chat.completion", created=int(time.time()),
82+
choices=[Choice(finish_reason="stop", index=0, message=ChatCompletionMessage(
83+
content=self.test_response, role="assistant"))],
84+
usage=CompletionUsage(completion_tokens=10, prompt_tokens=20, total_tokens=30),
85+
)
86+
87+
with patch("openai.resources.chat.completions.Completions.create", return_value=mock_response):
88+
client = OpenAI(posthog_client=self.client, api_key="test")
89+
90+
messages = [{"role": "user", "content": self.test_user_message}]
91+
92+
client.chat.completions.create(
93+
model="gpt-4", messages=messages, system=self.test_system_prompt, posthog_distinct_id="test-user"
94+
)
95+
96+
self.assertEqual(len(self.client.capture.call_args_list), 1)
97+
properties = self.client.capture.call_args_list[0][1]["properties"]
98+
self._assert_system_prompt_captured(properties["$ai_input"])
99+
100+
101+
def test_openai_streaming_system_parameter(self):
102+
"""Test OpenAI streaming with system parameter."""
103+
try:
104+
from posthog.ai.openai import OpenAI
105+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
106+
from openai.types.chat.chat_completion_chunk import Choice as ChoiceChunk
107+
from openai.types.chat.chat_completion_chunk import ChoiceDelta
108+
from openai.types.completion_usage import CompletionUsage
109+
except ImportError:
110+
self.skipTest("OpenAI package not available")
111+
112+
chunk1 = ChatCompletionChunk(
113+
id="test", model="gpt-4", object="chat.completion.chunk", created=int(time.time()),
114+
choices=[ChoiceChunk(finish_reason=None, index=0, delta=ChoiceDelta(content="Hello", role="assistant"))]
115+
)
116+
117+
chunk2 = ChatCompletionChunk(
118+
id="test", model="gpt-4", object="chat.completion.chunk", created=int(time.time()),
119+
choices=[ChoiceChunk(finish_reason="stop", index=0, delta=ChoiceDelta(content=" there!", role=None))],
120+
usage=CompletionUsage(completion_tokens=10, prompt_tokens=20, total_tokens=30)
121+
)
122+
123+
with patch("openai.resources.chat.completions.Completions.create", return_value=[chunk1, chunk2]):
124+
client = OpenAI(posthog_client=self.client, api_key="test")
125+
126+
messages = [{"role": "user", "content": self.test_user_message}]
127+
128+
response_generator = client.chat.completions.create(
129+
model="gpt-4", messages=messages, system=self.test_system_prompt,
130+
stream=True, posthog_distinct_id="test-user"
131+
)
132+
133+
list(response_generator) # Consume generator
134+
135+
self.assertEqual(len(self.client.capture.call_args_list), 1)
136+
properties = self.client.capture.call_args_list[0][1]["properties"]
137+
self._assert_system_prompt_captured(properties["$ai_input"])
138+
139+
# Anthropic Tests
140+
def test_anthropic_messages_array_system_prompt(self):
141+
"""Test Anthropic with system prompt in messages array."""
142+
try:
143+
from posthog.ai.anthropic import Anthropic
144+
except ImportError:
145+
self.skipTest("Anthropic package not available")
146+
147+
with patch("anthropic.resources.messages.Messages.create") as mock_create:
148+
mock_response = MagicMock()
149+
mock_response.usage.input_tokens = 20
150+
mock_response.usage.output_tokens = 10
151+
mock_response.usage.cache_read_input_tokens = None
152+
mock_response.usage.cache_creation_input_tokens = None
153+
mock_create.return_value = mock_response
154+
155+
client = Anthropic(posthog_client=self.client, api_key="test")
156+
157+
messages = [
158+
{"role": "system", "content": self.test_system_prompt},
159+
{"role": "user", "content": self.test_user_message}
160+
]
161+
162+
client.messages.create(model="claude-3-5-sonnet-20241022", messages=messages, posthog_distinct_id="test-user")
163+
164+
self.assertEqual(len(self.client.capture.call_args_list), 1)
165+
properties = self.client.capture.call_args_list[0][1]["properties"]
166+
self._assert_system_prompt_captured(properties["$ai_input"])
167+
168+
def test_anthropic_separate_system_parameter(self):
169+
"""Test Anthropic with system prompt as separate parameter."""
170+
try:
171+
from posthog.ai.anthropic import Anthropic
172+
except ImportError:
173+
self.skipTest("Anthropic package not available")
174+
175+
with patch("anthropic.resources.messages.Messages.create") as mock_create:
176+
mock_response = MagicMock()
177+
mock_response.usage.input_tokens = 20
178+
mock_response.usage.output_tokens = 10
179+
mock_response.usage.cache_read_input_tokens = None
180+
mock_response.usage.cache_creation_input_tokens = None
181+
mock_create.return_value = mock_response
182+
183+
client = Anthropic(posthog_client=self.client, api_key="test")
184+
185+
messages = [{"role": "user", "content": self.test_user_message}]
186+
187+
client.messages.create(
188+
model="claude-3-5-sonnet-20241022", messages=messages,
189+
system=self.test_system_prompt, posthog_distinct_id="test-user"
190+
)
191+
192+
self.assertEqual(len(self.client.capture.call_args_list), 1)
193+
properties = self.client.capture.call_args_list[0][1]["properties"]
194+
self._assert_system_prompt_captured(properties["$ai_input"])
195+
196+
# Gemini Tests
197+
def test_gemini_contents_array_system_prompt(self):
198+
"""Test Gemini with system prompt in contents array."""
199+
try:
200+
from posthog.ai.gemini import Client
201+
except ImportError:
202+
self.skipTest("Gemini package not available")
203+
204+
with patch("google.genai.Client") as mock_genai_class:
205+
mock_response = MagicMock()
206+
mock_response.candidates = [MagicMock()]
207+
mock_response.candidates[0].content.parts = [MagicMock()]
208+
mock_response.candidates[0].content.parts[0].text = self.test_response
209+
mock_response.usage_metadata.prompt_token_count = 20
210+
mock_response.usage_metadata.candidates_token_count = 10
211+
mock_response.usage_metadata.cached_content_token_count = None
212+
mock_response.usage_metadata.thoughts_token_count = None
213+
214+
mock_client_instance = MagicMock()
215+
mock_models_instance = MagicMock()
216+
mock_models_instance.generate_content.return_value = mock_response
217+
mock_client_instance.models = mock_models_instance
218+
mock_genai_class.return_value = mock_client_instance
219+
220+
client = Client(posthog_client=self.client, api_key="test")
221+
222+
contents = [
223+
{"role": "system", "content": self.test_system_prompt},
224+
{"role": "user", "content": self.test_user_message}
225+
]
226+
227+
client.models.generate_content(model="gemini-2.0-flash", contents=contents, posthog_distinct_id="test-user")
228+
229+
self.assertEqual(len(self.client.capture.call_args_list), 1)
230+
properties = self.client.capture.call_args_list[0][1]["properties"]
231+
self._assert_system_prompt_captured(properties["$ai_input"])
232+
233+
def test_gemini_system_instruction_parameter(self):
234+
"""Test Gemini with system_instruction parameter."""
235+
try:
236+
from posthog.ai.gemini import Client
237+
except ImportError:
238+
self.skipTest("Gemini package not available")
239+
240+
with patch("google.genai.Client") as mock_genai_class:
241+
mock_response = MagicMock()
242+
mock_response.candidates = [MagicMock()]
243+
mock_response.candidates[0].content.parts = [MagicMock()]
244+
mock_response.candidates[0].content.parts[0].text = self.test_response
245+
mock_response.usage_metadata.prompt_token_count = 20
246+
mock_response.usage_metadata.candidates_token_count = 10
247+
mock_response.usage_metadata.cached_content_token_count = None
248+
mock_response.usage_metadata.thoughts_token_count = None
249+
250+
mock_client_instance = MagicMock()
251+
mock_models_instance = MagicMock()
252+
mock_models_instance.generate_content.return_value = mock_response
253+
mock_client_instance.models = mock_models_instance
254+
mock_genai_class.return_value = mock_client_instance
255+
256+
client = Client(posthog_client=self.client, api_key="test")
257+
258+
contents = [{"role": "user", "content": self.test_user_message}]
259+
260+
client.models.generate_content(
261+
model="gemini-2.0-flash", contents=contents,
262+
system_instruction=self.test_system_prompt, posthog_distinct_id="test-user"
263+
)
264+
265+
self.assertEqual(len(self.client.capture.call_args_list), 1)
266+
properties = self.client.capture.call_args_list[0][1]["properties"]
267+
self._assert_system_prompt_captured(properties["$ai_input"])

0 commit comments

Comments
 (0)