-
Notifications
You must be signed in to change notification settings - Fork 57
fix: opinionated setup and clean fn fix #240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
dceb49d
3586a1c
cea7d2d
9926ad4
527a118
9caa73e
d53f8f4
85acc97
404058f
c5746d0
9e25657
fe866e0
f8ff7a8
d8ca3e8
cad50e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| import unittest | ||
| from dataclasses import dataclass | ||
| from datetime import date, datetime, timedelta | ||
| from decimal import Decimal | ||
| from typing import Optional | ||
| from uuid import UUID | ||
|
|
||
| import six | ||
| from dateutil.tz import tzutc | ||
| from parameterized import parameterized | ||
| from pydantic import BaseModel | ||
| from pydantic.v1 import BaseModel as BaseModelV1 | ||
|
|
||
| from posthog import utils | ||
|
|
||
|
|
||
| class TestSizeLimitedDict(unittest.TestCase): | ||
| @parameterized.expand([(10, 100), (5, 20), (20, 200)]) | ||
| def test_size_limited_dict(self, size: int, iterations: int) -> None: | ||
| values = utils.SizeLimitedDict(size, lambda _: -1) | ||
|
|
||
| for i in range(iterations): | ||
| values[i] = i | ||
|
|
||
| assert values[i] == i | ||
| assert len(values) == i % size + 1 | ||
|
|
||
| if i % size == 0: | ||
| # old numbers should've been removed | ||
| self.assertIsNone(values.get(i - 1)) | ||
| self.assertIsNone(values.get(i - 3)) | ||
| self.assertIsNone(values.get(i - 5)) | ||
| self.assertIsNone(values.get(i - 9)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
|
|
||
| import six | ||
| from dateutil.tz import tzutc | ||
| from parameterized import parameterized | ||
| from pydantic import BaseModel | ||
| from pydantic.v1 import BaseModel as BaseModelV1 | ||
|
|
||
|
|
@@ -17,17 +18,29 @@ | |
|
|
||
|
|
||
| class TestUtils(unittest.TestCase): | ||
| @parameterized.expand( | ||
| [ | ||
| ("naive datetime should be naive", True), | ||
| ("timezone-aware datetime should not be naive", False), | ||
| ] | ||
| ) | ||
| def test_is_naive(self, _name: str, expected_naive: bool): | ||
| if expected_naive: | ||
| dt = datetime.now() # naive datetime | ||
| else: | ||
| dt = datetime.now(tz=tzutc()) # timezone-aware datetime | ||
|
|
||
| assert utils.is_naive(dt) is expected_naive | ||
|
|
||
| def test_timezone_utils(self): | ||
| now = datetime.now() | ||
| utcnow = datetime.now(tz=tzutc()) | ||
| self.assertTrue(utils.is_naive(now)) | ||
| self.assertFalse(utils.is_naive(utcnow)) | ||
|
|
||
| fixed = utils.guess_timezone(now) | ||
| self.assertFalse(utils.is_naive(fixed)) | ||
| assert utils.is_naive(fixed) is False | ||
|
|
||
| shouldnt_be_edited = utils.guess_timezone(utcnow) | ||
| self.assertEqual(utcnow, shouldnt_be_edited) | ||
| assert utcnow == shouldnt_be_edited | ||
|
|
||
| def test_clean(self): | ||
| simple = { | ||
|
|
@@ -54,39 +67,37 @@ def test_clean(self): | |
| pre_clean_keys = combined.keys() | ||
|
|
||
| utils.clean(combined) | ||
| self.assertEqual(combined.keys(), pre_clean_keys) | ||
| assert combined.keys() == pre_clean_keys | ||
|
|
||
| # test UUID separately, as the UUID object doesn't equal its string representation according to Python | ||
| self.assertEqual( | ||
| utils.clean(UUID("12345678123456781234567812345678")), | ||
| "12345678-1234-5678-1234-567812345678", | ||
| ) | ||
| assert utils.clean(UUID("12345678123456781234567812345678")) == "12345678-1234-5678-1234-567812345678" | ||
|
|
||
| def test_clean_with_dates(self): | ||
| dict_with_dates = { | ||
| "birthdate": date(1980, 1, 1), | ||
| "registration": datetime.now(tz=tzutc()), | ||
| } | ||
| self.assertEqual(dict_with_dates, utils.clean(dict_with_dates)) | ||
| assert dict_with_dates == utils.clean(dict_with_dates) | ||
|
|
||
| def test_bytes(self): | ||
| if six.PY3: | ||
| item = bytes(10) | ||
| else: | ||
| item = bytearray(10) | ||
|
|
||
| item = bytes(10) | ||
| utils.clean(item) | ||
| assert utils.clean(item) == "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" | ||
|
|
||
| def test_clean_fn(self): | ||
| cleaned = utils.clean({"fn": lambda x: x, "number": 4}) | ||
| self.assertEqual(cleaned["number"], 4) | ||
| # TODO: fixme, different behavior on python 2 and 3 | ||
| if "fn" in cleaned: | ||
| self.assertEqual(cleaned["fn"], None) | ||
|
|
||
| def test_remove_slash(self): | ||
| self.assertEqual("http://posthog.io", utils.remove_trailing_slash("http://posthog.io/")) | ||
| self.assertEqual("http://posthog.io", utils.remove_trailing_slash("http://posthog.io")) | ||
| assert cleaned == {"fn": None, "number": 4} | ||
|
|
||
| @parameterized.expand( | ||
| [ | ||
| ("http://posthog.io/", "http://posthog.io"), | ||
| ("http://posthog.io", "http://posthog.io"), | ||
| ("https://example.com/path/", "https://example.com/path"), | ||
| ("https://example.com/path", "https://example.com/path"), | ||
| ] | ||
| ) | ||
| def test_remove_slash(self, input_url, expected_url): | ||
| assert expected_url == utils.remove_trailing_slash(input_url) | ||
|
|
||
| def test_clean_pydantic(self): | ||
| class ModelV2(BaseModel): | ||
|
|
@@ -101,19 +112,26 @@ class ModelV1(BaseModelV1): | |
| class NestedModel(BaseModel): | ||
| foo: ModelV2 | ||
|
|
||
| self.assertEqual(utils.clean(ModelV2(foo="1", bar=2)), {"foo": "1", "bar": 2, "baz": None}) | ||
| self.assertEqual(utils.clean(ModelV1(foo=1, bar="2")), {"foo": 1, "bar": "2"}) | ||
| self.assertEqual( | ||
| utils.clean(NestedModel(foo=ModelV2(foo="1", bar=2, baz="3"))), | ||
| {"foo": {"foo": "1", "bar": 2, "baz": "3"}}, | ||
| ) | ||
| assert utils.clean(ModelV2(foo="1", bar=2)) == { | ||
| "foo": "1", | ||
| "bar": 2, | ||
| "baz": None, | ||
| } | ||
| assert utils.clean(ModelV1(foo=1, bar="2")) == {"foo": 1, "bar": "2"} | ||
| assert utils.clean(NestedModel(foo=ModelV2(foo="1", bar=2, baz="3"))) == { | ||
| "foo": {"foo": "1", "bar": 2, "baz": "3"} | ||
| } | ||
|
|
||
| def test_clean_pydantic_like_class(self) -> None: | ||
| class Dummy: | ||
| def model_dump(self, required_param): | ||
| pass | ||
| def model_dump(self, required_param: str) -> dict: | ||
| return {} | ||
|
|
||
| # Skips a class with a defined non-Pydantic `model_dump` method. | ||
| self.assertEqual(utils.clean({"test": Dummy()}), {}) | ||
| # previously python 2 code would cause an error while cleaning, | ||
| # and this entire object would be None, and we would log an error | ||
| # let's allow ourselves to clean `Dummy` as None, | ||
| # without blatting the `test` key | ||
| assert utils.clean({"test": Dummy()}) == {"test": None} | ||
|
Comment on lines
+125
to
+134
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test was hiding that the behaviour was determined by throwing an exception in an exception handler |
||
|
|
||
| def test_clean_dataclass(self): | ||
| @dataclass | ||
|
|
@@ -130,47 +148,25 @@ class TestDataClass: | |
| bar: int | ||
| nested: InnerDataClass | ||
|
|
||
| self.assertEqual( | ||
| utils.clean( | ||
| TestDataClass( | ||
| foo="1", | ||
| bar=2, | ||
| nested=InnerDataClass( | ||
| inner_foo="3", | ||
| inner_bar=4, | ||
| inner_uuid=UUID("12345678123456781234567812345678"), | ||
| inner_date=datetime(2025, 1, 1), | ||
| ), | ||
| ) | ||
| ), | ||
| { | ||
| "foo": "1", | ||
| "bar": 2, | ||
| "nested": { | ||
| "inner_foo": "3", | ||
| "inner_bar": 4, | ||
| "inner_uuid": "12345678-1234-5678-1234-567812345678", | ||
| "inner_date": datetime(2025, 1, 1), | ||
| "inner_optional": None, | ||
| }, | ||
| assert utils.clean( | ||
| TestDataClass( | ||
| foo="1", | ||
| bar=2, | ||
| nested=InnerDataClass( | ||
| inner_foo="3", | ||
| inner_bar=4, | ||
| inner_uuid=UUID("12345678123456781234567812345678"), | ||
| inner_date=datetime(2025, 1, 1), | ||
| ), | ||
| ) | ||
| ) == { | ||
| "foo": "1", | ||
| "bar": 2, | ||
| "nested": { | ||
| "inner_foo": "3", | ||
| "inner_bar": 4, | ||
| "inner_uuid": "12345678-1234-5678-1234-567812345678", | ||
| "inner_date": datetime(2025, 1, 1), | ||
| "inner_optional": None, | ||
| }, | ||
| ) | ||
|
|
||
|
|
||
| class TestSizeLimitedDict(unittest.TestCase): | ||
| def test_size_limited_dict(self): | ||
| size = 10 | ||
| values = utils.SizeLimitedDict(size, lambda _: -1) | ||
|
|
||
| for i in range(100): | ||
| values[i] = i | ||
|
|
||
| self.assertEqual(values[i], i) | ||
| self.assertEqual(len(values), i % size + 1) | ||
|
|
||
| if i % size == 0: | ||
| # old numbers should've been removed | ||
| self.assertIsNone(values.get(i - 1)) | ||
| self.assertIsNone(values.get(i - 3)) | ||
| self.assertIsNone(values.get(i - 5)) | ||
| self.assertIsNone(values.get(i - 9)) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| from dataclasses import asdict, is_dataclass | ||
| from datetime import date, datetime, timezone | ||
| from decimal import Decimal | ||
| from typing import Any, Optional | ||
| from uuid import UUID | ||
|
|
||
| import six | ||
|
|
@@ -99,14 +100,35 @@ def _clean_dataclass(dataclass_): | |
| return data | ||
|
|
||
|
|
||
| def _coerce_unicode(cmplx): | ||
| def _coerce_unicode(cmplx: Any) -> Optional[str]: | ||
| """ | ||
| In theory, this method is only called | ||
| after many isinstance checks are carried out in `utils.clean`. | ||
| When we supported Python 2 it was safe to call `decode` on a `str` | ||
| but in Python 3 that will throw. | ||
| So, we check if the input is bytes and only call `decode` in that case. | ||
|
|
||
| Previously we would always call `decode` on the input | ||
| That would throw an error. | ||
| Then we would call `decode` on the stringified error | ||
| That would throw an error. | ||
| And then we would return `None` | ||
|
|
||
| To avoid a breaking change, we can maintain the behavior | ||
| that anything which did not have `decode` in Python 2 | ||
| returns None. | ||
| """ | ||
| item = None | ||
| try: | ||
| item = cmplx.decode("utf-8", "strict") | ||
| except AttributeError as exception: | ||
| item = ":".join(exception) | ||
| item.decode("utf-8", "strict") | ||
| if isinstance(cmplx, bytes): | ||
| item = cmplx.decode("utf-8", "strict") | ||
| elif isinstance(cmplx, str): | ||
| item = cmplx | ||
| except Exception as exception: | ||
| item = ":".join(map(str, exception.args)) | ||
| log.warning("Error decoding: %s", item) | ||
| return None | ||
|
Comment on lines
+127
to
130
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not super convinced this logging behaviour is correct, but considering it was always throwing before now, it can't be too bad |
||
|
|
||
| return item | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| VERSION = "4.2.0" | ||
| VERSION = "4.2.1" | ||
|
|
||
| if __name__ == "__main__": | ||
| print(VERSION, end="") # noqa: T201 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
might want to go commit by commit
i tidied this up as well as fixing some assertions