diff --git a/ee/api/scim/test/test_users_api.py b/ee/api/scim/test/test_users_api.py index e5a645981a5c8..3879d668b3792 100644 --- a/ee/api/scim/test/test_users_api.py +++ b/ee/api/scim/test/test_users_api.py @@ -7,6 +7,7 @@ from ee.api.scim.auth import generate_scim_token from ee.api.test.base import APILicensedTest from ee.models.rbac.role import RoleMembership +from ee.models.scim_provisioned_user import SCIMProvisionedUser class TestSCIMUsersAPI(APILicensedTest): @@ -56,6 +57,13 @@ def test_users_list_filter_exact_match(self): OrganizationMembership.objects.create( user=user_a, organization=self.organization, level=OrganizationMembership.Level.MEMBER ) + SCIMProvisionedUser.objects.create( + user=user_a, + organization_domain=self.domain, + username="engineering@example.com", + identity_provider=SCIMProvisionedUser.IdentityProvider.OTHER, + active=True, + ) user_b = User.objects.create_user( email="alex@example.com", password=None, first_name="Alex", last_name="Other", is_email_verified=True @@ -63,6 +71,13 @@ def test_users_list_filter_exact_match(self): OrganizationMembership.objects.create( user=user_b, organization=self.organization, level=OrganizationMembership.Level.MEMBER ) + SCIMProvisionedUser.objects.create( + user=user_b, + organization_domain=self.domain, + username="alex@example.com", + identity_provider=SCIMProvisionedUser.IdentityProvider.OTHER, + active=True, + ) # Exact match should return only engineering@example.com response = self.client.get( @@ -129,9 +144,9 @@ def test_users_list_filter_unrecognized_returns_empty_list(self): def test_create_user(self): user_data = { "schemas": ["urn:ietf:params:scim:schemas:core:2.0:User"], - "userName": "newuser@example.com", + "userName": "Newuser@example.com", "name": {"givenName": "New", "familyName": "User"}, - "emails": [{"value": "newuser@example.com", "primary": True}], + "emails": [{"value": "Newuser@example.com", "primary": True}], "active": True, } @@ -141,11 +156,11 @@ def test_create_user(self): assert response.status_code == status.HTTP_201_CREATED data = response.json() - assert data["userName"] == "newuser@example.com" + assert data["userName"] == "Newuser@example.com" assert data["name"]["givenName"] == "New" assert data["name"]["familyName"] == "User" - # Verify user was created + # Verify user was created with lowercase email user = User.objects.get(email="newuser@example.com") assert user.first_name == "New" assert user.last_name == "User" @@ -155,6 +170,12 @@ def test_create_user(self): membership = OrganizationMembership.objects.get(user=user, organization=self.organization) assert membership.level == OrganizationMembership.Level.MEMBER + # Verify SCIM provisioned user record was created + scim_user = SCIMProvisionedUser.objects.get(user=user, organization_domain=self.domain) + assert scim_user.username == "Newuser@example.com" + assert scim_user.active is True + assert scim_user.identity_provider == SCIMProvisionedUser.IdentityProvider.OTHER + def test_existing_user_is_added_to_org(self): # Create user in different org other_org = Organization.objects.create(name="Other Org") @@ -184,9 +205,11 @@ def test_existing_user_is_added_to_org(self): assert OrganizationMembership.objects.filter(user=existing_user, organization=self.organization).exists() assert OrganizationMembership.objects.filter(user=existing_user, organization=other_org).exists() - def test_repeated_post_does_not_create_duplicate_user(self): - # In case the IdP failed to match user by id, it can send POST request to create a new user. - # The user should be merged with existing one by email, not create a duplicate. + # Verify SCIM provisioned user record was created for this domain + scim_user = SCIMProvisionedUser.objects.get(user=existing_user, organization_domain=self.domain) + assert scim_user.active is True + + def test_repeated_post_returns_409_for_already_provisioned_user(self): user_data_first = { "schemas": ["urn:ietf:params:scim:schemas:core:2.0:User"], "userName": "repeat@example.com", @@ -202,7 +225,7 @@ def test_repeated_post_does_not_create_duplicate_user(self): assert response.status_code == status.HTTP_201_CREATED first_user = User.objects.get(email="repeat@example.com") - # IdP sends POST request again with same email + # IdP sends POST request again with same email - should fail with 409 user_data_second = { "schemas": ["urn:ietf:params:scim:schemas:core:2.0:User"], "userName": "repeat@example.com", @@ -215,14 +238,14 @@ def test_repeated_post_does_not_create_duplicate_user(self): f"/scim/v2/{self.domain.id}/Users", data=user_data_second, content_type="application/scim+json" ) - assert response.status_code == status.HTTP_201_CREATED + assert response.status_code == status.HTTP_409_CONFLICT # Should NOT create duplicate user assert User.objects.filter(email="repeat@example.com").count() == 1 - # User should be updated with new data from second POST + # User should NOT be updated (still has first POST data) first_user.refresh_from_db() - assert first_user.first_name == "Second" + assert first_user.first_name == "First" assert first_user.last_name == "Time" # User should have only one membership @@ -261,6 +284,14 @@ def test_deactivate_user(self): OrganizationMembership.objects.create( user=user, organization=self.organization, level=OrganizationMembership.Level.MEMBER ) + # Create SCIM provisioned user record + SCIMProvisionedUser.objects.create( + user=user, + organization_domain=self.domain, + username="deactivate@example.com", + identity_provider=SCIMProvisionedUser.IdentityProvider.OTHER, + active=True, + ) patch_data = { "schemas": ["urn:ietf:params:scim:api:messages:2.0:PatchOp"], @@ -280,6 +311,10 @@ def test_deactivate_user(self): user.refresh_from_db() assert user.is_active is True # User is still active globally + # Verify SCIM provisioned user record still exists but is marked inactive + scim_user = SCIMProvisionedUser.objects.get(user=user, organization_domain=self.domain) + assert scim_user.active is False + def test_delete_user(self): user = User.objects.create_user( email="delete@example.com", password=None, first_name="Delete", is_email_verified=True @@ -287,6 +322,14 @@ def test_delete_user(self): OrganizationMembership.objects.create( user=user, organization=self.organization, level=OrganizationMembership.Level.MEMBER ) + # Create SCIM provisioned user record + SCIMProvisionedUser.objects.create( + user=user, + organization_domain=self.domain, + username="delete@example.com", + identity_provider=SCIMProvisionedUser.IdentityProvider.OTHER, + active=True, + ) response = self.client.delete(f"/scim/v2/{self.domain.id}/Users/{user.id}") @@ -295,6 +338,9 @@ def test_delete_user(self): # Verify membership was removed assert not OrganizationMembership.objects.filter(user=user, organization=self.organization).exists() + # Verify SCIM provisioned user record was deleted + assert not SCIMProvisionedUser.objects.filter(user=user, organization_domain=self.domain).exists() + def test_put_user(self): user = User.objects.create_user( email="old@example.com", password=None, first_name="Old", last_name="Name", is_email_verified=True @@ -302,6 +348,14 @@ def test_put_user(self): OrganizationMembership.objects.create( user=user, organization=self.organization, level=OrganizationMembership.Level.MEMBER ) + # Create SCIM provisioned user record + SCIMProvisionedUser.objects.create( + user=user, + organization_domain=self.domain, + username="old@example.com", + identity_provider=SCIMProvisionedUser.IdentityProvider.OTHER, + active=True, + ) put_data = { "schemas": ["urn:ietf:params:scim:schemas:core:2.0:User"], @@ -321,6 +375,11 @@ def test_put_user(self): assert user.last_name == "User" assert user.email == "put@example.com" + # Verify SCIM provisioned user was updated + scim_user = SCIMProvisionedUser.objects.get(user=user, organization_domain=self.domain) + assert scim_user.username == "put@example.com" + assert scim_user.active is True + def test_put_user_not_found(self): put_data = { "schemas": ["urn:ietf:params:scim:schemas:core:2.0:User"], diff --git a/ee/api/scim/user.py b/ee/api/scim/user.py index 26a4dc07532d2..85d9d0936d863 100644 --- a/ee/api/scim/user.py +++ b/ee/api/scim/user.py @@ -10,6 +10,11 @@ from posthog.models.organization_domain import OrganizationDomain from ee.models.rbac.role import RoleMembership +from ee.models.scim_provisioned_user import SCIMProvisionedUser + + +class SCIMUserConflict(Exception): + """User is already SCIM-provisioned for this organization domain.""" class PostHogSCIMUser(SCIMUser): @@ -54,7 +59,10 @@ def name(self) -> dict: @property def user_name(self) -> str: - return self.obj.email + scim_user = SCIMProvisionedUser.objects.filter( + user=self.obj, organization_domain=self._organization_domain + ).first() + return scim_user.username if scim_user else self.obj.email @property def active(self) -> bool: @@ -124,7 +132,12 @@ def to_dict(self) -> dict: return base_dict @classmethod - def from_dict(cls, data: dict, organization_domain: OrganizationDomain) -> "PostHogSCIMUser": + def from_dict( + cls, + data: dict, + organization_domain: OrganizationDomain, + identity_provider: SCIMProvisionedUser.IdentityProvider = SCIMProvisionedUser.IdentityProvider.OTHER, + ) -> "PostHogSCIMUser": """ Create or update a User from SCIM data. """ @@ -135,10 +148,16 @@ def from_dict(cls, data: dict, organization_domain: OrganizationDomain) -> "Post name_data = data.get("name", {}) first_name = name_data.get("givenName", "") last_name = name_data.get("familyName", "") + user_name = data.get("userName", email) + active = data.get("active", True) with transaction.atomic(): user = User.objects.filter(email__iexact=email).first() + # Check if already SCIM-provisioned for this org domain + if user and SCIMProvisionedUser.objects.filter(user=user, organization_domain=organization_domain).exists(): + raise SCIMUserConflict() + if user: if first_name: user.first_name = first_name @@ -164,6 +183,16 @@ def from_dict(cls, data: dict, organization_domain: OrganizationDomain) -> "Post user.current_team = organization_domain.organization.teams.first() user.save() + SCIMProvisionedUser.objects.update_or_create( + user=user, + organization_domain=organization_domain, + defaults={ + "identity_provider": identity_provider, + "username": user_name, + "active": active, + }, + ) + return cls(user, organization_domain) def put(self, data: dict) -> None: @@ -174,6 +203,8 @@ def put(self, data: dict) -> None: """ name_data = data.get("name", {}) email = self._extract_email_from_value(data.get("emails", [])) + user_name = data.get("userName", email) + is_active = data.get("active", True) if not email: raise ValueError("Email is required") @@ -189,19 +220,46 @@ def put(self, data: dict) -> None: self.obj.email = email self.obj.save() + SCIMProvisionedUser.objects.update_or_create( + user=self.obj, + organization_domain=self._organization_domain, + defaults={ + "username": user_name, + "active": is_active, + "identity_provider": SCIMProvisionedUser.IdentityProvider.OTHER, + }, + ) + # Deactivate user if active is false - is_active = data.get("active", True) if not is_active: - self.delete() + self.deactivate() + + def deactivate(self) -> None: + """ + Deactivate user by removing their membership and marking SCIM record as inactive. + """ + OrganizationMembership.objects.filter( + user=self.obj, organization=self._organization_domain.organization + ).delete() + + SCIMProvisionedUser.objects.filter( + user=self.obj, + organization_domain=self._organization_domain, + ).update(active=False) def delete(self) -> None: """ - Deactivate user by removing their membership from this organization. + Delete user by removing their membership and SCIM provisioned user record. """ OrganizationMembership.objects.filter( user=self.obj, organization=self._organization_domain.organization ).delete() + SCIMProvisionedUser.objects.filter( + user=self.obj, + organization_domain=self._organization_domain, + ).delete() + def handle_replace(self, path: AttrPath, value: Union[str, list, dict], operation: dict) -> None: """ Handle SCIM PATCH replace operations (called by django-scim2 handle_operations). @@ -216,7 +274,18 @@ def handle_replace(self, path: AttrPath, value: Union[str, list, dict], operatio with transaction.atomic(): if attr_name == "active": if not value: - self.delete() + self.deactivate() + return + else: + SCIMProvisionedUser.objects.update_or_create( + user=self.obj, + organization_domain=self._organization_domain, + defaults={ + "active": True, + "username": self.obj.email, + "identity_provider": SCIMProvisionedUser.IdentityProvider.OTHER, + }, + ) elif attr_name == "name": if sub_attr == "givenName" and isinstance(value, str): @@ -239,6 +308,17 @@ def handle_replace(self, path: AttrPath, value: Union[str, list, dict], operatio if email: self.obj.email = email + elif attr_name == "userName" and isinstance(value, str): + SCIMProvisionedUser.objects.update_or_create( + user=self.obj, + organization_domain=self._organization_domain, + defaults={ + "username": value, + "active": True, + "identity_provider": SCIMProvisionedUser.IdentityProvider.OTHER, + }, + ) + self.obj.save() def handle_add(self, path: AttrPath, value: Union[str, list, dict], operation: dict) -> None: @@ -257,6 +337,16 @@ def handle_add(self, path: AttrPath, value: Union[str, list, dict], operation: d defaults={"level": OrganizationMembership.Level.MEMBER}, ) + SCIMProvisionedUser.objects.update_or_create( + user=self.obj, + organization_domain=self._organization_domain, + defaults={ + "active": True, + "username": self.obj.email, + "identity_provider": SCIMProvisionedUser.IdentityProvider.OTHER, + }, + ) + elif attr_name == "name": if sub_attr == "givenName" and isinstance(value, str): self.obj.first_name = value @@ -280,6 +370,17 @@ def handle_add(self, path: AttrPath, value: Union[str, list, dict], operation: d self.obj.email = email self.obj.save() + elif attr_name == "userName" and isinstance(value, str): + SCIMProvisionedUser.objects.update_or_create( + user=self.obj, + organization_domain=self._organization_domain, + defaults={ + "username": value, + "active": True, + "identity_provider": SCIMProvisionedUser.IdentityProvider.OTHER, + }, + ) + def handle_remove(self, path: AttrPath, value: Union[str, list, dict], operation: dict) -> None: """ Handle SCIM PATCH remove operations (called by django-scim2 handle_operations). @@ -290,7 +391,8 @@ def handle_remove(self, path: AttrPath, value: Union[str, list, dict], operation with transaction.atomic(): if attr_name == "active": - self.delete() + self.deactivate() + return elif attr_name == "name": if sub_attr == "givenName": diff --git a/ee/api/scim/utils.py b/ee/api/scim/utils.py index 6f47ad7793dcc..b14ccf762e34a 100644 --- a/ee/api/scim/utils.py +++ b/ee/api/scim/utils.py @@ -1,5 +1,9 @@ +from rest_framework.request import Request + from posthog.models.organization_domain import OrganizationDomain +from ee.models.scim_provisioned_user import SCIMProvisionedUser + from .auth import generate_scim_token @@ -47,3 +51,21 @@ def get_scim_base_url(domain: OrganizationDomain, request=None) -> str: base_url = settings.SITE_URL return f"{base_url}/scim/v2/{domain.id}" + + +def detect_identity_provider(request: Request) -> SCIMProvisionedUser.IdentityProvider: + """ + Detect identity provider from request User-Agent header. + """ + user_agent = request.META.get("HTTP_USER_AGENT", "").lower() + + if "okta" in user_agent: + return SCIMProvisionedUser.IdentityProvider.OKTA + elif "entra" in user_agent or "microsoft" in user_agent: + return SCIMProvisionedUser.IdentityProvider.ENTRA_ID + elif "google" in user_agent: + return SCIMProvisionedUser.IdentityProvider.GOOGLE + elif "onelogin" in user_agent: + return SCIMProvisionedUser.IdentityProvider.ONELOGIN + + return SCIMProvisionedUser.IdentityProvider.OTHER diff --git a/ee/api/scim/views.py b/ee/api/scim/views.py index 29f62865ac644..e35edd7be4e55 100644 --- a/ee/api/scim/views.py +++ b/ee/api/scim/views.py @@ -13,6 +13,7 @@ from rest_framework.request import Request from rest_framework.response import Response from rest_framework.views import APIView +from scim2_filter_parser.transpilers.django_q_object import get_query from posthog.exceptions_capture import capture_exception from posthog.models import User @@ -20,11 +21,12 @@ from ee.api.scim.auth import SCIMBearerTokenAuthentication from ee.api.scim.group import PostHogSCIMGroup -from ee.api.scim.user import PostHogSCIMUser +from ee.api.scim.user import PostHogSCIMUser, SCIMUserConflict +from ee.api.scim.utils import detect_identity_provider from ee.models.rbac.role import Role +from ee.models.scim_provisioned_user import SCIMProvisionedUser SCIM_USER_ATTR_MAP = { - ("userName", None, None): "email", ("emails", "value", None): "email", ("name", "familyName", None): "last_name", ("familyName", None, None): "last_name", @@ -82,9 +84,20 @@ class PostHogUserFilterQuery(UserFilterQuery): @classmethod def search(cls, filter_query: str, request: Request) -> QuerySet[User]: - raw_queryset = super().search(filter_query, request) - # Filter results to only include users from the specified organization org_domain = cast(OrganizationDomain, request.auth) + + if "userName" in filter_query: + # userName is stored in SCIMProvisionedUser, not User + # UserFilterQuery only queries User model, so use scim2-filter-parser directly + scim_attr_map = {("userName", None, None): "username"} + q_obj = get_query(filter_query, scim_attr_map) + scim_user_ids = SCIMProvisionedUser.objects.filter( + q_obj, + organization_domain=org_domain, + ).values_list("user_id", flat=True) + return User.objects.filter(id__in=scim_user_ids) + + raw_queryset = super().search(filter_query, request) user_ids = [user.id for user in raw_queryset] return User.objects.filter( id__in=user_ids, @@ -142,9 +155,16 @@ def get(self, request: Request, domain_id: str) -> Response: def post(self, request: Request, domain_id: str) -> Response: organization_domain = cast(OrganizationDomain, request.auth) + try: - scim_user = PostHogSCIMUser.from_dict(request.data, organization_domain) + identity_provider = detect_identity_provider(request) + scim_user = PostHogSCIMUser.from_dict(request.data, organization_domain, identity_provider) return Response(scim_user.to_dict(), status=status.HTTP_201_CREATED) + except SCIMUserConflict: + return Response( + {"schemas": [constants.SchemaURI.ERROR], "status": 409, "detail": "User already exists"}, + status=status.HTTP_409_CONFLICT, + ) except ValueError as e: capture_exception( e, diff --git a/ee/migrations/0032_scimprovisioneduser_and_more.py b/ee/migrations/0032_scimprovisioneduser_and_more.py new file mode 100644 index 0000000000000..5ec6fb453c92b --- /dev/null +++ b/ee/migrations/0032_scimprovisioneduser_and_more.py @@ -0,0 +1,73 @@ +# Generated by Django 4.2.26 on 2025-11-28 14:03 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + +import posthog.models.utils + + +class Migration(migrations.Migration): + dependencies = [ + ("posthog", "0923_add_quick_filters"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("ee", "0031_agentartifact"), + ] + + operations = [ + migrations.CreateModel( + name="SCIMProvisionedUser", + fields=[ + ("updated_at", models.DateTimeField(auto_now=True, null=True)), + ( + "id", + models.UUIDField( + default=posthog.models.utils.uuid7, editable=False, primary_key=True, serialize=False + ), + ), + ( + "identity_provider", + models.CharField( + choices=[ + ("okta", "Okta"), + ("entra_id", "Microsoft Entra ID"), + ("google", "Google Workspace"), + ("onelogin", "OneLogin"), + ("other", "Other"), + ], + max_length=50, + ), + ), + ("username", models.CharField(max_length=255)), + ("active", models.BooleanField(default=True)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "organization_domain", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="scim_provisioned_users", + to="posthog.organizationdomain", + ), + ), + ( + "user", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="scim_provisions", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "indexes": [ + models.Index(fields=["organization_domain", "username"], name="ee_scimprov_organiz_6d83ff_idx") + ], + }, + ), + migrations.AddConstraint( + model_name="scimprovisioneduser", + constraint=models.UniqueConstraint( + fields=("user", "organization_domain"), name="unique_user_organization_domain" + ), + ), + ] diff --git a/ee/migrations/max_migration.txt b/ee/migrations/max_migration.txt index 5caec67acea57..de04df1f5d3a2 100644 --- a/ee/migrations/max_migration.txt +++ b/ee/migrations/max_migration.txt @@ -1 +1 @@ -0031_agentartifact +0032_scimprovisioneduser_and_more diff --git a/ee/models/__init__.py b/ee/models/__init__.py index 0b0959fbd8000..aa6215723966e 100644 --- a/ee/models/__init__.py +++ b/ee/models/__init__.py @@ -13,6 +13,7 @@ from .property_definition import EnterprisePropertyDefinition from .rbac.access_control import AccessControl from .rbac.role import Role, RoleMembership +from .scim_provisioned_user import SCIMProvisionedUser from .session_summaries import SingleSessionSummary __all__ = [ @@ -30,5 +31,6 @@ "License", "Role", "RoleMembership", + "SCIMProvisionedUser", "SingleSessionSummary", ] diff --git a/ee/models/scim_provisioned_user.py b/ee/models/scim_provisioned_user.py new file mode 100644 index 0000000000000..08147e9789ed4 --- /dev/null +++ b/ee/models/scim_provisioned_user.py @@ -0,0 +1,34 @@ +from django.db import models + +from posthog.models.utils import UpdatedMetaFields, UUIDModel + + +class SCIMProvisionedUser(UUIDModel, UpdatedMetaFields): + class IdentityProvider(models.TextChoices): + OKTA = "okta", "Okta" + ENTRA_ID = "entra_id", "Microsoft Entra ID" + GOOGLE = "google", "Google Workspace" + ONELOGIN = "onelogin", "OneLogin" + OTHER = "other", "Other" + + user = models.ForeignKey("posthog.User", on_delete=models.CASCADE, related_name="scim_provisions") + organization_domain = models.ForeignKey( + "posthog.OrganizationDomain", on_delete=models.CASCADE, related_name="scim_provisioned_users" + ) + + identity_provider = models.CharField(max_length=50, choices=IdentityProvider.choices) + username = models.CharField(max_length=255) + active = models.BooleanField(default=True) + + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["user", "organization_domain"], + name="unique_user_organization_domain", + ) + ] + indexes = [ + models.Index(fields=["organization_domain", "username"]), + ]