PostHog · pauldambra · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 4.2.1 - 2025-6-05
+
+1. fix: feature flag request use geoip_disable (#235)
+2. chore: pin actions versions (#210)
+3. fix: opinionated setup and clean fn fix (#240)
+
 ## 4.2.0 - 2025-05-22
 
 Add support for google gemini

diff --git a/README.md b/README.md
@@ -17,6 +17,17 @@ Please see the [Python integration docs](https://posthog.com/docs/integrations/p
 4. Run `make test`
   1. To run a specific test do `pytest -k test_no_api_key`
 
+## PostHog recommends `uv` so...
+
+```bash
+uv python install 3.9.19
+uv python pin 3.9.19
+uv venv env
+source env/bin/activate
+uv pip install --editable ".[dev,test]"
+make test
+```
+
 ### Running Locally
 
 Assuming you have a [local version of PostHog](https://posthog.com/docs/developing-locally) running, you can run `python3 example.py` to see the library in action.

diff --git a/mypy-baseline.txt b/mypy-baseline.txt
@@ -1,7 +1,6 @@
 posthog/utils.py:0: error: Library stubs not installed for "six"  [import-untyped]
 posthog/utils.py:0: error: Library stubs not installed for "dateutil.tz"  [import-untyped]
 posthog/utils.py:0: error: Statement is unreachable  [unreachable]
-posthog/utils.py:0: error: Argument 1 to "join" of "str" has incompatible type "AttributeError"; expected "Iterable[str]"  [arg-type]
 posthog/request.py:0: error: Library stubs not installed for "requests"  [import-untyped]
 posthog/request.py:0: note: Hint: "python3 -m pip install types-requests"
 posthog/request.py:0: error: Library stubs not installed for "dateutil.tz"  [import-untyped]
@@ -38,4 +37,4 @@ posthog/ai/utils.py:0: note: Perhaps you meant "typing.Any" instead of "any"?
 posthog/ai/utils.py:0: error: Function "builtins.any" is not valid as a type  [valid-type]
 posthog/ai/utils.py:0: note: Perhaps you meant "typing.Any" instead of "any"?
 sentry_django_example/sentry_django_example/settings.py:0: error: Need type annotation for "ALLOWED_HOSTS" (hint: "ALLOWED_HOSTS: list[<type>] = ...")  [var-annotated]
-sentry_django_example/sentry_django_example/settings.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "None")  [assignment]
+sentry_django_example/sentry_django_example/settings.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "None")  [assignment]
diff --git a/posthog/test/test_size_limited_dict.py b/posthog/test/test_size_limited_dict.py
@@ -0,0 +1,33 @@
+import unittest
+from dataclasses import dataclass
+from datetime import date, datetime, timedelta
+from decimal import Decimal
+from typing import Optional
+from uuid import UUID
+
+import six
+from dateutil.tz import tzutc
+from parameterized import parameterized
+from pydantic import BaseModel
+from pydantic.v1 import BaseModel as BaseModelV1
+
+from posthog import utils
+
+
+class TestSizeLimitedDict(unittest.TestCase):
+    @parameterized.expand([(10, 100), (5, 20), (20, 200)])
+    def test_size_limited_dict(self, size: int, iterations: int) -> None:
+        values = utils.SizeLimitedDict(size, lambda _: -1)
+
+        for i in range(iterations):
+            values[i] = i
+
+            assert values[i] == i
+            assert len(values) == i % size + 1
+
+            if i % size == 0:
+                # old numbers should've been removed
+                self.assertIsNone(values.get(i - 1))
+                self.assertIsNone(values.get(i - 3))
+                self.assertIsNone(values.get(i - 5))
+                self.assertIsNone(values.get(i - 9))
diff --git a/posthog/test/test_utils.py b/posthog/test/test_utils.py
@@ -7,6 +7,7 @@
 
 import six
 from dateutil.tz import tzutc
+from parameterized import parameterized
 from pydantic import BaseModel
 from pydantic.v1 import BaseModel as BaseModelV1
 
@@ -17,17 +18,29 @@
 
 
 class TestUtils(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("naive datetime should be naive", True),
+            ("timezone-aware datetime should not be naive", False),
+        ]
+    )
+    def test_is_naive(self, _name: str, expected_naive: bool):
+        if expected_naive:
+            dt = datetime.now()  # naive datetime
+        else:
+            dt = datetime.now(tz=tzutc())  # timezone-aware datetime
+
+        assert utils.is_naive(dt) is expected_naive
+
     def test_timezone_utils(self):
         now = datetime.now()
         utcnow = datetime.now(tz=tzutc())
-        self.assertTrue(utils.is_naive(now))
-        self.assertFalse(utils.is_naive(utcnow))
 
         fixed = utils.guess_timezone(now)
-        self.assertFalse(utils.is_naive(fixed))
+        assert utils.is_naive(fixed) is False
 
         shouldnt_be_edited = utils.guess_timezone(utcnow)
-        self.assertEqual(utcnow, shouldnt_be_edited)
+        assert utcnow == shouldnt_be_edited
 
     def test_clean(self):
         simple = {
@@ -54,39 +67,37 @@ def test_clean(self):
         pre_clean_keys = combined.keys()
 
         utils.clean(combined)
-        self.assertEqual(combined.keys(), pre_clean_keys)
+        assert combined.keys() == pre_clean_keys
 
         # test UUID separately, as the UUID object doesn't equal its string representation according to Python
-        self.assertEqual(
-            utils.clean(UUID("12345678123456781234567812345678")),
-            "12345678-1234-5678-1234-567812345678",
-        )
+        assert utils.clean(UUID("12345678123456781234567812345678")) == "12345678-1234-5678-1234-567812345678"
 
     def test_clean_with_dates(self):
         dict_with_dates = {
             "birthdate": date(1980, 1, 1),
             "registration": datetime.now(tz=tzutc()),
         }
-        self.assertEqual(dict_with_dates, utils.clean(dict_with_dates))
+        assert dict_with_dates == utils.clean(dict_with_dates)
 
     def test_bytes(self):
-        if six.PY3:
-            item = bytes(10)
-        else:
-            item = bytearray(10)
-
+        item = bytes(10)
         utils.clean(item)
+        assert utils.clean(item) == "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 
     def test_clean_fn(self):
         cleaned = utils.clean({"fn": lambda x: x, "number": 4})
-        self.assertEqual(cleaned["number"], 4)
-        # TODO: fixme, different behavior on python 2 and 3
-        if "fn" in cleaned:
-            self.assertEqual(cleaned["fn"], None)
-
-    def test_remove_slash(self):
-        self.assertEqual("http://posthog.io", utils.remove_trailing_slash("http://posthog.io/"))
-        self.assertEqual("http://posthog.io", utils.remove_trailing_slash("http://posthog.io"))
+        assert cleaned == {"fn": None, "number": 4}
+
+    @parameterized.expand(
+        [
+            ("http://posthog.io/", "http://posthog.io"),
+            ("http://posthog.io", "http://posthog.io"),
+            ("https://example.com/path/", "https://example.com/path"),
+            ("https://example.com/path", "https://example.com/path"),
+        ]
+    )
+    def test_remove_slash(self, input_url, expected_url):
+        assert expected_url == utils.remove_trailing_slash(input_url)
 
     def test_clean_pydantic(self):
         class ModelV2(BaseModel):
@@ -101,19 +112,26 @@ class ModelV1(BaseModelV1):
         class NestedModel(BaseModel):
             foo: ModelV2
 
-        self.assertEqual(utils.clean(ModelV2(foo="1", bar=2)), {"foo": "1", "bar": 2, "baz": None})
-        self.assertEqual(utils.clean(ModelV1(foo=1, bar="2")), {"foo": 1, "bar": "2"})
-        self.assertEqual(
-            utils.clean(NestedModel(foo=ModelV2(foo="1", bar=2, baz="3"))),
-            {"foo": {"foo": "1", "bar": 2, "baz": "3"}},
-        )
+        assert utils.clean(ModelV2(foo="1", bar=2)) == {
+            "foo": "1",
+            "bar": 2,
+            "baz": None,
+        }
+        assert utils.clean(ModelV1(foo=1, bar="2")) == {"foo": 1, "bar": "2"}
+        assert utils.clean(NestedModel(foo=ModelV2(foo="1", bar=2, baz="3"))) == {
+            "foo": {"foo": "1", "bar": 2, "baz": "3"}
+        }
 
+    def test_clean_pydantic_like_class(self) -> None:
         class Dummy:
-            def model_dump(self, required_param):
-                pass
+            def model_dump(self, required_param: str) -> dict:
+                return {}
 
-        # Skips a class with a defined non-Pydantic `model_dump` method.
-        self.assertEqual(utils.clean({"test": Dummy()}), {})
+        # previously python 2 code would cause an error while cleaning,
+        # and this entire object would be None, and we would log an error
+        # let's allow ourselves to clean `Dummy` as None,
+        # without blatting the `test` key
+        assert utils.clean({"test": Dummy()}) == {"test": None}
 
     def test_clean_dataclass(self):
         @dataclass
@@ -130,47 +148,25 @@ class TestDataClass:
             bar: int
             nested: InnerDataClass
 
-        self.assertEqual(
-            utils.clean(
-                TestDataClass(
-                    foo="1",
-                    bar=2,
-                    nested=InnerDataClass(
-                        inner_foo="3",
-                        inner_bar=4,
-                        inner_uuid=UUID("12345678123456781234567812345678"),
-                        inner_date=datetime(2025, 1, 1),
-                    ),
-                )
-            ),
-            {
-                "foo": "1",
-                "bar": 2,
-                "nested": {
-                    "inner_foo": "3",
-                    "inner_bar": 4,
-                    "inner_uuid": "12345678-1234-5678-1234-567812345678",
-                    "inner_date": datetime(2025, 1, 1),
-                    "inner_optional": None,
-                },
+        assert utils.clean(
+            TestDataClass(
+                foo="1",
+                bar=2,
+                nested=InnerDataClass(
+                    inner_foo="3",
+                    inner_bar=4,
+                    inner_uuid=UUID("12345678123456781234567812345678"),
+                    inner_date=datetime(2025, 1, 1),
+                ),
+            )
+        ) == {
+            "foo": "1",
+            "bar": 2,
+            "nested": {
+                "inner_foo": "3",
+                "inner_bar": 4,
+                "inner_uuid": "12345678-1234-5678-1234-567812345678",
+                "inner_date": datetime(2025, 1, 1),
+                "inner_optional": None,
             },
-        )
-
-
-class TestSizeLimitedDict(unittest.TestCase):
-    def test_size_limited_dict(self):
-        size = 10
-        values = utils.SizeLimitedDict(size, lambda _: -1)
-
-        for i in range(100):
-            values[i] = i
-
-            self.assertEqual(values[i], i)
-            self.assertEqual(len(values), i % size + 1)
-
-            if i % size == 0:
-                # old numbers should've been removed
-                self.assertIsNone(values.get(i - 1))
-                self.assertIsNone(values.get(i - 3))
-                self.assertIsNone(values.get(i - 5))
-                self.assertIsNone(values.get(i - 9))
+        }
diff --git a/posthog/utils.py b/posthog/utils.py
@@ -5,6 +5,7 @@
 from dataclasses import asdict, is_dataclass
 from datetime import date, datetime, timezone
 from decimal import Decimal
+from typing import Any, Optional
 from uuid import UUID
 
 import six
@@ -99,14 +100,35 @@ def _clean_dataclass(dataclass_):
     return data
 
 
-def _coerce_unicode(cmplx):
+def _coerce_unicode(cmplx: Any) -> Optional[str]:
+    """
+    In theory, this method is only called
+    after many isinstance checks are carried out in `utils.clean`.
+    When we supported Python 2 it was safe to call `decode` on a `str`
+    but in Python 3 that will throw.
+    So, we check if the input is bytes and only call `decode` in that case.
+
+    Previously we would always call `decode` on the input
+    That would throw an error.
+    Then we would call `decode` on the stringified error
+    That would throw an error.
+    And then we would return `None`
+
+    To avoid a breaking change, we can maintain the behavior
+    that anything which did not have `decode` in Python 2
+    returns None.
+    """
+    item = None
     try:
-        item = cmplx.decode("utf-8", "strict")
-    except AttributeError as exception:
-        item = ":".join(exception)
-        item.decode("utf-8", "strict")
+        if isinstance(cmplx, bytes):
+            item = cmplx.decode("utf-8", "strict")
+        elif isinstance(cmplx, str):
+            item = cmplx
+    except Exception as exception:
+        item = ":".join(map(str, exception.args))
         log.warning("Error decoding: %s", item)
         return None
+
     return item
 
 

diff --git a/posthog/version.py b/posthog/version.py
@@ -1,4 +1,4 @@
-VERSION = "4.2.0"
+VERSION = "4.2.1"
 
 if __name__ == "__main__":
     print(VERSION, end="")  # noqa: T201