@@ -133,6 +133,29 @@ def test_sequence_invalid_join_field_wrong_package(self) -> None:
133133 with self .assertRaisesRegex (ValueError , r"Error in both stack and integrations checks" ):
134134 rc .load_dict (bad_rule )
135135
136+ def test_sequence_okta_missing_in_metadata_but_present_in_dataset (self ) -> None :
137+ """Okta dataset appears in a subquery but is not listed in metadata; dataset should drive schema selection."""
138+ rc = RuleCollection ()
139+ query = """
140+ sequence with maxspan=30m
141+ [any where event.dataset == "azure.identity_protection"] by azure.identityprotection.properties.user_principal_name
142+ [any where event.dataset == "azure.auditlogs" and event.action == "Register device"] by azure.auditlogs.properties.initiated_by.user.userPrincipalName
143+ [authentication where event.dataset == "okta.system" and okta.event_type == "user.mfa.okta_verify.deny_push"] by okta.actor.id
144+ """
145+ rule = {
146+ # Intentionally do not include "okta" in metadata.integrations
147+ "metadata" : mk_metadata (["azure" ], comments = "Okta present via dataset only" ),
148+ "rule" : mk_rule (
149+ name = "EQL sequence with okta dataset only" ,
150+ rule_id = "3c4d5e77-2345-4f8d-9f72-1d8e5f3e5f13" ,
151+ description = "Validate that dataset usage includes okta schema even if not in metadata." ,
152+ risk_score = 50 ,
153+ query = query ,
154+ ),
155+ }
156+ # Should load without error because get_packaged_integrations includes packages parsed from datasets
157+ rc .load_dict (rule )
158+
136159 def test_sequence_across_integrations_valid (self ) -> None :
137160 """Sequence uses azure and crowdstrike datasets; each subquery validates against its own integration."""
138161 rc = RuleCollection ()
@@ -173,3 +196,23 @@ def test_sequence_across_integrations_invalid_crowdstrike_subquery_azure_field(s
173196 }
174197 with self .assertRaisesRegex (ValueError , r"Error in both stack and integrations checks" ):
175198 rc .load_dict (bad_rule )
199+
200+ def test_sequence_datasetless_subquery_with_metadata_integration_valid (self ) -> None :
201+ """Datasetless azure subquery uses azure.* fields with metadata including azure; should validate and pass."""
202+ rc = RuleCollection ()
203+ query = """
204+ sequence with maxspan=30m
205+ [any where azure.identityprotection.properties.user_principal_name != null] by azure.identityprotection.properties.user_principal_name
206+ [any where event.dataset == "azure.auditlogs"] by azure.auditlogs.properties.initiated_by.user.userPrincipalName
207+ """
208+ rule = {
209+ "metadata" : mk_metadata (["azure" ], comments = "Datasetless subquery with azure fields" ),
210+ "rule" : mk_rule (
211+ name = "EQL sequence datasetless azure subquery" ,
212+ rule_id = "3d4e5f88-3456-4f8d-9f72-1d8e5f3e5f14" ,
213+ description = "Datasetless azure subquery relies on metadata/field inference for package schema." ,
214+ risk_score = 30 ,
215+ query = query ,
216+ ),
217+ }
218+ rc .load_dict (rule )
0 commit comments