Skip to content

Commit c02c5d5

Browse files
committed
some test fixes
1 parent 8f6626c commit c02c5d5

File tree

5 files changed

+23
-11
lines changed

5 files changed

+23
-11
lines changed

posthog/models/cohort/cohort.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from posthog.models.property import Property, PropertyGroup
2828
from posthog.models.utils import RootTeamManager, RootTeamMixin, sane_repr
2929
from posthog.person_db_router import PERSONS_DB_FOR_WRITE
30-
from posthog.personhog_client.gate import use_personhog
3130
from posthog.settings.base_variables import TEST
3231

3332
if TYPE_CHECKING:
@@ -391,6 +390,8 @@ def _get_uuids_for_distinct_ids_batch(self, distinct_ids: list[str], team_id: in
391390
# You're going to be tempted to exclude people already in the cohort, but that's not only NOT
392391
# necessary, but it leads to query timeouts. The insert_users_list_by_uuid handles ensuring we
393392
# don't insert people that are already in the cohort efficiently.
393+
from posthog.personhog_client.gate import use_personhog
394+
394395
if use_personhog():
395396
persons = get_persons_by_distinct_ids(team_id, list(distinct_ids))
396397
return [str(person.uuid) for person in persons]

posthog/models/person/test/test_util_personhog_routing.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -997,15 +997,12 @@ def test_single_person_multiple_distinct_ids(self):
997997
result = get_persons_mapped_by_distinct_id(self.team.pk, ["did-1", "did-2"])
998998

999999
# Both distinct_ids should map to the same person
1000+
assert len(result) == 2
10001001
assert str(result["did-1"].uuid) == str(person.uuid)
1002+
assert str(result["did-2"].uuid) == str(person.uuid)
10011003
# Each entry should carry only the distinct_id that was used as the key
10021004
assert result["did-1"].distinct_ids == ["did-1"]
1003-
if self.personhog:
1004-
# Personhog path deduplicates by person_id, so only one entry
1005-
assert len(result) == 1
1006-
else:
1007-
assert len(result) == 2
1008-
assert result["did-2"].distinct_ids == ["did-2"]
1005+
assert result["did-2"].distinct_ids == ["did-2"]
10091006

10101007
def test_multiple_persons(self):
10111008
p1 = self._seed_person(team=self.team, distinct_ids=["alice"], properties={"name": "Alice"})

posthog/models/person/util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,16 @@ def _fetch_persons_by_distinct_ids_via_personhog(
245245
"personhog_team_mismatch", operation="get_persons_by_distinct_ids", team_id=team_id, dropped=mismatched
246246
)
247247

248+
# The RPC returns one result per distinct_id, so the same person can
249+
# appear multiple times. Deduplicate by person_id to return unique persons.
250+
seen_person_ids: set[int] = set()
251+
unique_results = []
252+
for r in valid_results:
253+
if r.person.id not in seen_person_ids:
254+
seen_person_ids.add(r.person.id)
255+
unique_results.append(r)
256+
valid_results = unique_results
257+
248258
person_ids = [r.person.id for r in valid_results]
249259
if not person_ids:
250260
return []

posthog/personhog_client/fake_client.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,14 +208,17 @@ def get_persons_by_distinct_ids_in_team(
208208
) -> person_pb2.PersonsByDistinctIdsInTeamResponse:
209209
self.calls.append(_Call("get_persons_by_distinct_ids_in_team", request))
210210
results = []
211-
seen_person_ids: set[int] = set()
212211
for did in request.distinct_ids:
213212
person = self._persons_by_distinct_id.get((request.team_id, did))
214-
if person and person.id not in seen_person_ids:
215-
seen_person_ids.add(person.id)
213+
if person:
216214
results.append(person_pb2.PersonWithDistinctIds(distinct_id=did, person=person))
217215
return person_pb2.PersonsByDistinctIdsInTeamResponse(results=results)
218216

217+
# NOTE: the real RPC returns one result per requested distinct_id (no
218+
# deduplication by person). Callers that need unique persons (e.g.
219+
# _fetch_persons_by_distinct_ids_via_personhog) must deduplicate
220+
# themselves.
221+
219222
def get_distinct_ids_for_person(
220223
self, request: person_pb2.GetDistinctIdsForPersonRequest
221224
) -> person_pb2.GetDistinctIdsForPersonResponse:

posthog/queries/actor_base_query.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from posthog.models.person.person import READ_DB_FOR_PERSONS
2222
from posthog.personhog_client.client import get_personhog_client
2323
from posthog.personhog_client.converters import proto_person_to_model
24-
from posthog.personhog_client.gate import use_personhog
2524
from posthog.personhog_client.metrics import (
2625
PERSONHOG_ROUTING_ERRORS_TOTAL,
2726
PERSONHOG_ROUTING_TOTAL,
@@ -308,6 +307,8 @@ def get_people(
308307
distinct_id_limit: int | None = 1000,
309308
) -> tuple[Union[QuerySet[Person], list[Person]], list[SerializedPerson]]:
310309
"""Get people from raw SQL results in data model and dict formats"""
310+
from posthog.personhog_client.gate import use_personhog
311+
311312
if use_personhog():
312313
try:
313314
persons = _fetch_people_via_personhog(team.pk, people_ids, distinct_id_limit)

0 commit comments

Comments
 (0)