diff --git a/BEFORE_SEND.md b/BEFORE_SEND.md new file mode 100644 index 00000000..a3696d3d --- /dev/null +++ b/BEFORE_SEND.md @@ -0,0 +1,237 @@ +# Before Send Hook + +The `before_send` parameter allows you to modify or filter events before they are sent to PostHog. This is useful for: + +- **Privacy**: Removing or masking sensitive data (PII) +- **Filtering**: Dropping unwanted events (test events, internal users, etc.) +- **Enhancement**: Adding custom properties to all events +- **Transformation**: Modifying event names or property formats + +## Basic Usage + +```python +import posthog +from typing import Optional, Dict, Any + +def my_before_send(event: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """ + Process event before sending to PostHog. + + Args: + event: The event dictionary containing 'event', 'distinct_id', 'properties', etc. + + Returns: + Modified event dictionary to send, or None to drop the event + """ + # Your processing logic here + return event + +# Initialize client with before_send hook +client = posthog.Client( + api_key="your-project-api-key", + before_send=my_before_send +) +``` + +## Common Use Cases + +### 1. Filter Out Events + +```python +from typing import Optional, Any + +def filter_events_by_property_or_event_name(event: dict[str, Any]) -> Optional[dict[str, Any]]: + """Drop events from internal users or test environments.""" + properties = event.get("properties", {}) + + # Choose some property from your events + event_source = properties.get("event_source", "") + if event_source.endswith("internal"): + return None # Drop the event + + # Filter out test events + if event.get("event") == "test_event": + return None + + return event +``` + +### 2. Remove/Mask PII Data + +```python +from typing import Optional, Any + +def scrub_pii(event: dict[str, Any]) -> Optional[dict[str, Any]]: + """Remove or mask personally identifiable information.""" + properties = event.get("properties", {}) + + # Mask email but keep domain for analytics + if "email" in properties: + email = properties["email"] + if "@" in email: + domain = email.split("@")[1] + properties["email"] = f"***@{domain}" + else: + properties["email"] = "***" + + # Remove sensitive fields entirely + sensitive_fields = ["my_business_info", "secret_things"] + for field in sensitive_fields: + properties.pop(field, None) + + return event +``` + +### 3. Add Custom Properties + +```python +from typing import Optional, Any + +from datetime import datetime +from typing import Optional, Any + +def add_context(event: dict[str, Any]) -> Optional[dict[str, Any]]: + """Add custom properties to all events.""" + if "properties" not in event: + event["properties"] = {} + + event["properties"].update({ + "app_version": "2.1.0", + "environment": "production", + "processed_at": datetime.now().isoformat() + }) + + return event +``` + +### 4. Transform Event Names + +```python +from typing import Optional, Any + +def normalize_event_names(event: dict[str, Any]) -> Optional[dict[str, Any]]: + """Convert event names to a consistent format.""" + original_event = event.get("event") + if original_event: + # Convert to snake_case + normalized = original_event.lower().replace(" ", "_").replace("-", "_") + event["event"] = f"app_{normalized}" + + return event +``` + +### 5. Log and drop in "dev" mode + +When running in local dev often, you want to log but drop all events + + +```python +from typing import Optional, Any + +def log_and_drop_all(event: dict[str, Any]) -> Optional[dict[str, Any]]: + """Convert event names to a consistent format.""" + print(event) + + return None +``` + +### 6. Combined Processing + +```python +from typing import Optional, Any + +def comprehensive_processor(event: dict[str, Any]) -> Optional[dict[str, Any]]: + """Apply multiple transformations in sequence.""" + + # Step 1: Filter unwanted events + if should_drop_event(event): + return None + + # Step 2: Scrub PII + event = scrub_pii(event) + + # Step 3: Add context + event = add_context(event) + + # Step 4: Normalize names + event = normalize_event_names(event) + + return event + +def should_drop_event(event: dict[str, Any]) -> bool: + """Determine if event should be dropped.""" + # Your filtering logic + return False +``` + +## Error Handling + +If your `before_send` function raises an exception, PostHog will: + +1. Log the error +2. Continue with the original, unmodified event +3. Not crash your application + +```python +from typing import Optional, Any + +def risky_before_send(event: dict[str, Any]) -> Optional[dict[str, Any]]: + # If this raises an exception, the original event will be sent + risky_operation() + return event +``` + +## Complete Example + +```python +import posthog +from typing import Optional, Any +import re + +def production_before_send(event: dict[str, Any]) -> Optional[dict[str, Any]]: + try: + properties = event.get("properties", {}) + + # 1. Filter out bot traffic + user_agent = properties.get("$user_agent", "") + if re.search(r'bot|crawler|spider', user_agent, re.I): + return None + + # 2. Filter out internal traffic + ip = properties.get("$ip", "") + if ip.startswith("192.168.") or ip.startswith("10."): + return None + + # 3. Scrub email PII but keep domain + if "email" in properties: + email = properties["email"] + if "@" in email: + domain = email.split("@")[1] + properties["email"] = f"***@{domain}" + + # 4. Add custom context + properties.update({ + "app_version": "1.0.0", + "build_number": "123" + }) + + # 5. Normalize event name + if event.get("event"): + event["event"] = event["event"].lower().replace(" ", "_") + + return event + + except Exception as e: + # Log error but don't crash + print(f"Error in before_send: {e}") + return event # Return original event on error + +# Usage +client = posthog.Client( + api_key="your-api-key", + before_send=production_before_send +) + +# All events will now be processed by your before_send function +client.capture("user_123", "Page View", {"url": "/home"}) +``` \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index ed44c797..31fd5ffc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 4.5.0- 2025-06-09 + +- feat: add before_send callback (#249) + ## 4.4.2- 2025-06-09 - empty point release to fix release automation diff --git a/posthog/client.py b/posthog/client.py index 3bd68ad9..a6ce4423 100644 --- a/posthog/client.py +++ b/posthog/client.py @@ -144,6 +144,7 @@ def __init__( exception_autocapture_integrations=None, project_root=None, privacy_mode=False, + before_send=None, ): self.queue = queue.Queue(max_queue_size) @@ -199,6 +200,15 @@ def __init__( else: self.log.setLevel(logging.WARNING) + if before_send is not None: + if callable(before_send): + self.before_send = before_send + else: + self.log.warning("before_send is not callable, it will be ignored") + self.before_send = None + else: + self.before_send = None + if self.enable_exception_autocapture: self.exception_capture = ExceptionCapture( self, integrations=self.exception_autocapture_integrations @@ -744,6 +754,18 @@ def _enqueue(self, msg, disable_geoip): msg["distinct_id"] = stringify_id(msg.get("distinct_id", None)) msg = clean(msg) + + if self.before_send: + try: + modified_msg = self.before_send(msg) + if modified_msg is None: + self.log.debug("Event dropped by before_send callback") + return True, None + msg = modified_msg + except Exception as e: + self.log.exception(f"Error in before_send callback: {e}") + # Continue with the original message if callback fails + self.log.debug("queueing: %s", msg) # if send is False, return msg as if it was successfully queued diff --git a/posthog/test/test_before_send.py b/posthog/test/test_before_send.py new file mode 100644 index 00000000..e081e0d1 --- /dev/null +++ b/posthog/test/test_before_send.py @@ -0,0 +1,171 @@ +import unittest + +import mock + +from posthog.client import Client +from posthog.test.test_utils import FAKE_TEST_API_KEY + + +class TestClient(unittest.TestCase): + @classmethod + def setUpClass(cls): + # This ensures no real HTTP POST requests are made + cls.client_post_patcher = mock.patch("posthog.client.batch_post") + cls.consumer_post_patcher = mock.patch("posthog.consumer.batch_post") + cls.client_post_patcher.start() + cls.consumer_post_patcher.start() + + @classmethod + def tearDownClass(cls): + cls.client_post_patcher.stop() + cls.consumer_post_patcher.stop() + + def set_fail(self, e, batch): + """Mark the failure handler""" + print("FAIL", e, batch) # noqa: T201 + self.failed = True + + def setUp(self): + self.failed = False + self.client = Client(FAKE_TEST_API_KEY, on_error=self.set_fail) + + def test_before_send_callback_modifies_event(self): + """Test that before_send callback can modify events.""" + processed_events = [] + + def my_before_send(event): + processed_events.append(event.copy()) + if "properties" not in event: + event["properties"] = {} + event["properties"]["processed_by_before_send"] = True + return event + + client = Client( + FAKE_TEST_API_KEY, on_error=self.set_fail, before_send=my_before_send + ) + success, msg = client.capture("user1", "test_event", {"original": "value"}) + + self.assertTrue(success) + self.assertEqual(msg["properties"]["processed_by_before_send"], True) + self.assertEqual(msg["properties"]["original"], "value") + self.assertEqual(len(processed_events), 1) + self.assertEqual(processed_events[0]["event"], "test_event") + + def test_before_send_callback_drops_event(self): + """Test that before_send callback can drop events by returning None.""" + + def drop_test_events(event): + if event.get("event") == "test_drop_me": + return None + return event + + client = Client( + FAKE_TEST_API_KEY, on_error=self.set_fail, before_send=drop_test_events + ) + + # Event should be dropped + success, msg = client.capture("user1", "test_drop_me") + self.assertTrue(success) + self.assertIsNone(msg) + + # Event should go through + success, msg = client.capture("user1", "keep_me") + self.assertTrue(success) + self.assertIsNotNone(msg) + self.assertEqual(msg["event"], "keep_me") + + def test_before_send_callback_handles_exceptions(self): + """Test that exceptions in before_send don't crash the client.""" + + def buggy_before_send(event): + raise ValueError("Oops!") + + client = Client( + FAKE_TEST_API_KEY, on_error=self.set_fail, before_send=buggy_before_send + ) + success, msg = client.capture("user1", "robust_event") + + # Event should still be sent despite the exception + self.assertTrue(success) + self.assertIsNotNone(msg) + self.assertEqual(msg["event"], "robust_event") + + def test_before_send_callback_works_with_all_event_types(self): + """Test that before_send works with capture, identify, set, etc.""" + + def add_marker(event): + if "properties" not in event: + event["properties"] = {} + event["properties"]["marked"] = True + return event + + client = Client( + FAKE_TEST_API_KEY, on_error=self.set_fail, before_send=add_marker + ) + + # Test capture + success, msg = client.capture("user1", "event") + self.assertTrue(success) + self.assertTrue(msg["properties"]["marked"]) + + # Test identify + success, msg = client.identify("user1", {"trait": "value"}) + self.assertTrue(success) + self.assertTrue(msg["properties"]["marked"]) + + # Test set + success, msg = client.set("user1", {"prop": "value"}) + self.assertTrue(success) + self.assertTrue(msg["properties"]["marked"]) + + # Test page + success, msg = client.page("user1", "https://example.com") + self.assertTrue(success) + self.assertTrue(msg["properties"]["marked"]) + + def test_before_send_callback_disabled_when_none(self): + """Test that client works normally when before_send is None.""" + client = Client(FAKE_TEST_API_KEY, on_error=self.set_fail, before_send=None) + success, msg = client.capture("user1", "normal_event") + + self.assertTrue(success) + self.assertIsNotNone(msg) + self.assertEqual(msg["event"], "normal_event") + + def test_before_send_callback_pii_scrubbing_example(self): + """Test a realistic PII scrubbing use case.""" + + def scrub_pii(event): + properties = event.get("properties", {}) + + # Mask email but keep domain + if "email" in properties: + email = properties["email"] + if "@" in email: + domain = email.split("@")[1] + properties["email"] = f"***@{domain}" + else: + properties["email"] = "***" + + # Remove credit card + properties.pop("credit_card", None) + + return event + + client = Client( + FAKE_TEST_API_KEY, on_error=self.set_fail, before_send=scrub_pii + ) + success, msg = client.capture( + "user1", + "form_submit", + { + "email": "user@example.com", + "credit_card": "1234-5678-9012-3456", + "form_name": "contact", + }, + ) + + self.assertTrue(success) + self.assertEqual(msg["properties"]["email"], "***@example.com") + self.assertNotIn("credit_card", msg["properties"]) + self.assertEqual(msg["properties"]["form_name"], "contact") diff --git a/posthog/types.py b/posthog/types.py index 4c3796ea..97de99a1 100644 --- a/posthog/types.py +++ b/posthog/types.py @@ -1,9 +1,13 @@ import json from dataclasses import dataclass -from typing import Any, List, Optional, TypedDict, Union, cast +from typing import Any, Callable, List, Optional, TypedDict, Union, cast FlagValue = Union[bool, str] +# Type alias for the before_send callback function +# Takes an event dictionary and returns the modified event or None to drop it +BeforeSendCallback = Callable[[dict[str, Any]], Optional[dict[str, Any]]] + @dataclass(frozen=True) class FlagReason: diff --git a/posthog/version.py b/posthog/version.py index d262c5ba..b996a2ab 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "4.4.2" +VERSION = "4.5.0" if __name__ == "__main__": print(VERSION, end="") # noqa: T201