11package com .datadog .appsec .gateway ;
22
33import static com .datadog .appsec .event .data .MapDataBundle .Builder .CAPACITY_0_2 ;
4+ import static com .datadog .appsec .event .data .MapDataBundle .Builder .CAPACITY_3_4 ;
45import static com .datadog .appsec .event .data .MapDataBundle .Builder .CAPACITY_6_10 ;
56import static com .datadog .appsec .gateway .AppSecRequestContext .DEFAULT_REQUEST_HEADERS_ALLOW_LIST ;
67import static com .datadog .appsec .gateway .AppSecRequestContext .REQUEST_HEADERS_ALLOW_LIST ;
78import static com .datadog .appsec .gateway .AppSecRequestContext .RESPONSE_HEADERS_ALLOW_LIST ;
9+ import static datadog .trace .api .UserIdCollectionMode .ANONYMIZATION ;
10+ import static datadog .trace .api .UserIdCollectionMode .DISABLED ;
11+ import static datadog .trace .api .UserIdCollectionMode .SDK ;
12+ import static datadog .trace .api .telemetry .LogCollector .SEND_TELEMETRY ;
13+ import static datadog .trace .util .Strings .toHexString ;
814
915import com .datadog .appsec .AppSecSystem ;
1016import com .datadog .appsec .api .security .ApiSecurityRequestSampler ;
2228import com .datadog .appsec .report .AppSecEventWrapper ;
2329import datadog .trace .api .Config ;
2430import datadog .trace .api .UserIdCollectionMode ;
25- import datadog .trace .api .function .TriFunction ;
2631import datadog .trace .api .gateway .Events ;
2732import datadog .trace .api .gateway .Flow ;
2833import datadog .trace .api .gateway .IGSpanInfo ;
4146import java .net .URISyntaxException ;
4247import java .nio .charset .Charset ;
4348import java .nio .charset .StandardCharsets ;
49+ import java .security .MessageDigest ;
50+ import java .security .NoSuchAlgorithmException ;
4451import java .util .ArrayList ;
4552import java .util .Arrays ;
4653import java .util .Collection ;
5158import java .util .Map ;
5259import java .util .Set ;
5360import java .util .concurrent .ConcurrentHashMap ;
61+ import java .util .concurrent .atomic .AtomicBoolean ;
5462import java .util .regex .Pattern ;
63+ import java .util .stream .Collectors ;
5564import org .slf4j .Logger ;
5665import org .slf4j .LoggerFactory ;
5766
@@ -65,6 +74,10 @@ public class GatewayBridge {
6574 private static final Pattern QUERY_PARAM_SPLITTER = Pattern .compile ("&" );
6675 private static final Map <String , List <String >> EMPTY_QUERY_PARAMS = Collections .emptyMap ();
6776
77+ private static final int HASH_SIZE_BYTES = 16 ; // 128 bits
78+ private static final String ANON_PREFIX = "anon_" ;
79+ private static final AtomicBoolean SHA_MISSING_REPORTED = new AtomicBoolean (false );
80+
6881 /** User tracking tags that will force the collection of request headers */
6982 private static final String [] USER_TRACKING_TAGS = {
7083 "appsec.events.users.login.success.track" , "appsec.events.users.login.failure.track"
@@ -91,7 +104,8 @@ public class GatewayBridge {
91104 private volatile DataSubscriberInfo ioNetUrlSubInfo ;
92105 private volatile DataSubscriberInfo ioFileSubInfo ;
93106 private volatile DataSubscriberInfo sessionIdSubInfo ;
94- private final ConcurrentHashMap <Address <String >, DataSubscriberInfo > userIdSubInfo =
107+ private volatile DataSubscriberInfo userIdSubInfo ;
108+ private final ConcurrentHashMap <String , DataSubscriberInfo > loginEventSubInfo =
95109 new ConcurrentHashMap <>();
96110
97111 public GatewayBridge (
@@ -134,11 +148,8 @@ public void init() {
134148 subscriptionService .registerCallback (EVENTS .networkConnection (), this ::onNetworkConnection );
135149 subscriptionService .registerCallback (EVENTS .fileLoaded (), this ::onFileLoaded );
136150 subscriptionService .registerCallback (EVENTS .requestSession (), this ::onRequestSession );
137- subscriptionService .registerCallback (EVENTS .userId (), this .onUserEvent (KnownAddresses .USER_ID ));
138- subscriptionService .registerCallback (
139- EVENTS .loginSuccess (), this .onUserEvent (KnownAddresses .LOGIN_SUCCESS ));
140- subscriptionService .registerCallback (
141- EVENTS .loginFailure (), this .onUserEvent (KnownAddresses .LOGIN_FAILURE ));
151+ subscriptionService .registerCallback (EVENTS .user (), this ::onUser );
152+ subscriptionService .registerCallback (EVENTS .loginEvent (), this ::onLoginEvent );
142153
143154 if (additionalIGEvents .contains (EVENTS .requestPathParams ())) {
144155 subscriptionService .registerCallback (EVENTS .requestPathParams (), this ::onRequestPathParams );
@@ -149,55 +160,157 @@ public void init() {
149160 }
150161 }
151162
152- private TriFunction <RequestContext , UserIdCollectionMode , String , Flow <Void >> onUserEvent (
153- final Address <String > address ) {
154- return (ctx_ , mode , userId ) -> {
155- final AppSecRequestContext ctx = ctx_ .getData (RequestContextSlot .APPSEC );
156- if (userId == null || ctx == null ) {
163+ private Flow <Void > onUser (
164+ final RequestContext ctx_ , final UserIdCollectionMode mode , final String originalUser ) {
165+ if (mode == DISABLED ) {
166+ return NoopFlow .INSTANCE ;
167+ }
168+ final String user = anonymizeUser (mode , originalUser );
169+ if (user == null ) {
170+ return NoopFlow .INSTANCE ;
171+ }
172+ final AppSecRequestContext ctx = ctx_ .getData (RequestContextSlot .APPSEC );
173+ if (ctx == null ) {
174+ return NoopFlow .INSTANCE ;
175+ }
176+ final TraceSegment segment = ctx_ .getTraceSegment ();
177+
178+ // span with ASM data
179+ segment .setTagTop (Tags .ASM_KEEP , true );
180+ segment .setTagTop (Tags .PROPAGATED_APPSEC , true );
181+
182+ // skip event if we have an SDK one
183+ if (mode != SDK ) {
184+ segment .setTagTop ("_dd.appsec.usr.id" , user );
185+ if (ctx .getUserIdSource () == SDK ) {
157186 return NoopFlow .INSTANCE ;
158187 }
159- final TraceSegment segment = ctx_ .getTraceSegment ();
160- // user id can be set by the SDK overriding the auto event, always update the segment
161- segment .setTagTop ("usr.id" , userId );
162- segment .setTagTop ("_dd.appsec.user.collection_mode" , mode .shortName ());
163- final List <Address <?>> addresses = new ArrayList <>(2 );
164- final boolean newUserId = !userId .equals (ctx .getUserId ());
165- if (newUserId ) {
166- // unlikely that multiple threads will update the value at the same time
167- ctx .setUserId (userId );
168- addresses .add (KnownAddresses .USER_ID );
169- }
170- if (address != KnownAddresses .USER_ID ) {
171- addresses .add (address );
172- }
173- if (addresses .isEmpty ()) {
174- // nothing to publish so short-circuit here
188+ }
189+
190+ // update span tags
191+ segment .setTagTop ("usr.id" , user );
192+ segment .setTagTop ("_dd.appsec.user.collection_mode" , mode .fullName ());
193+
194+ // update current context with new user id
195+ ctx .setUserIdSource (mode );
196+ final boolean newUserId = !user .equals (ctx .getUserId ());
197+ if (!newUserId ) {
198+ return NoopFlow .INSTANCE ;
199+ }
200+ ctx .setUserId (user );
201+
202+ // call waf if we have a new user id
203+ while (true ) {
204+ DataSubscriberInfo subInfo = userIdSubInfo ;
205+ if (subInfo == null ) {
206+ subInfo = producerService .getDataSubscribers (KnownAddresses .USER_ID );
207+ userIdSubInfo = subInfo ;
208+ }
209+ if (subInfo == null || subInfo .isEmpty ()) {
175210 return NoopFlow .INSTANCE ;
176211 }
177- final Address <?>[] addressArray = addresses .toArray (new Address [0 ]);
178- while (true ) {
179- DataSubscriberInfo subInfo =
180- userIdSubInfo .computeIfAbsent (
181- address , k -> producerService .getDataSubscribers (addressArray ));
182- if (subInfo == null || subInfo .isEmpty ()) {
183- return NoopFlow .INSTANCE ;
184- }
185- MapDataBundle .Builder bundle = new MapDataBundle .Builder (CAPACITY_0_2 );
186- if (newUserId ) {
187- bundle .add (KnownAddresses .USER_ID , userId );
188- }
189- if (address != KnownAddresses .USER_ID ) {
190- // we don't support null values for the address so we use an invalid placeholder here
191- bundle .add (address , "invalid" );
192- }
193- try {
194- GatewayContext gwCtx = new GatewayContext (false );
195- return producerService .publishDataEvent (subInfo , ctx , bundle .build (), gwCtx );
196- } catch (ExpiredSubscriberInfoException e ) {
197- userIdSubInfo .remove (address );
198- }
212+ DataBundle bundle =
213+ new MapDataBundle .Builder (CAPACITY_0_2 ).add (KnownAddresses .USER_ID , user ).build ();
214+ try {
215+ GatewayContext gwCtx = new GatewayContext (false );
216+ return producerService .publishDataEvent (subInfo , ctx , bundle , gwCtx );
217+ } catch (ExpiredSubscriberInfoException e ) {
218+ userIdSubInfo = null ;
219+ }
220+ }
221+ }
222+
223+ private Flow <Void > onLoginEvent (
224+ final RequestContext ctx_ ,
225+ final UserIdCollectionMode mode ,
226+ final String eventName ,
227+ final Boolean exists ,
228+ final String originalUser ,
229+ final Map <String , String > metadata ) {
230+ if (mode == DISABLED ) {
231+ return NoopFlow .INSTANCE ;
232+ }
233+ final String user = anonymizeUser (mode , originalUser );
234+ if (user == null ) {
235+ return NoopFlow .INSTANCE ;
236+ }
237+ final AppSecRequestContext ctx = ctx_ .getData (RequestContextSlot .APPSEC );
238+ if (ctx == null ) {
239+ return NoopFlow .INSTANCE ;
240+ }
241+ final TraceSegment segment = ctx_ .getTraceSegment ();
242+
243+ // span with ASM data
244+ segment .setTagTop (Tags .ASM_KEEP , true );
245+ segment .setTagTop (Tags .PROPAGATED_APPSEC , true );
246+
247+ // skip event if we have an SDK one
248+ if (mode != SDK ) {
249+ segment .setTagTop ("_dd.appsec.usr.login" , user );
250+ segment .setTagTop ("_dd.appsec.usr.id" , user );
251+ segment .setTagTop (
252+ "_dd.appsec.events.users." + eventName + ".auto.mode" , mode .fullName (), true );
253+ if (ctx .getUserLoginSource () == SDK ) {
254+ return NoopFlow .INSTANCE ;
255+ }
256+ } else {
257+ segment .setTagTop ("_dd.appsec.events.users." + eventName + ".sdk" , true , true );
258+ }
259+
260+ // update span tags
261+ segment .setTagTop ("appsec.events.users." + eventName + ".usr.login" , user , true );
262+ segment .setTagTop ("appsec.events.users." + eventName + ".usr.id" , user , true );
263+ segment .setTagTop ("appsec.events.users." + eventName + ".track" , true , true );
264+ if (exists != null ) {
265+ segment .setTagTop ("appsec.events.users." + eventName + ".usr.exists" , exists , true );
266+ }
267+ if (metadata != null && !metadata .isEmpty ()) {
268+ segment .setTagTop ("appsec.events.users." + eventName , metadata , true );
269+ }
270+
271+ // update current context with new user login
272+ ctx .setUserLoginSource (mode );
273+ final boolean newUserLogin = !user .equals (ctx .getUserLogin ());
274+ if (!newUserLogin ) {
275+ return NoopFlow .INSTANCE ;
276+ }
277+ ctx .setUserLogin (user );
278+
279+ // call waf if we have a new user login
280+ final List <Address <?>> addresses = new ArrayList <>(3 );
281+ addresses .add (KnownAddresses .USER_LOGIN );
282+ addresses .add (KnownAddresses .USER_ID );
283+ if (KnownAddresses .LOGIN_SUCCESS .getKey ().endsWith (eventName )) {
284+ addresses .add (KnownAddresses .LOGIN_SUCCESS );
285+ } else if (KnownAddresses .LOGIN_FAILURE .getKey ().endsWith (eventName )) {
286+ addresses .add (KnownAddresses .LOGIN_FAILURE );
287+ }
288+ final MapDataBundle .Builder bundleBuilder =
289+ new MapDataBundle .Builder (addresses .size () == 2 ? CAPACITY_0_2 : CAPACITY_3_4 );
290+ bundleBuilder .add (KnownAddresses .USER_ID , user );
291+ bundleBuilder .add (KnownAddresses .USER_LOGIN , user );
292+ if (addresses .size () == 3 ) {
293+ // we don't support null values for the address so we use an invalid placeholder here
294+ bundleBuilder .add (addresses .get (2 ), "invalid" );
295+ }
296+ final DataBundle bundle = bundleBuilder .build ();
297+ final String subInfoKey =
298+ addresses .stream ().map (Address ::getKey ).collect (Collectors .joining ("|" ));
299+ while (true ) {
300+ DataSubscriberInfo subInfo =
301+ loginEventSubInfo .computeIfAbsent (
302+ subInfoKey ,
303+ t -> producerService .getDataSubscribers (addresses .toArray (new Address [0 ])));
304+ if (subInfo == null || subInfo .isEmpty ()) {
305+ return NoopFlow .INSTANCE ;
306+ }
307+ try {
308+ GatewayContext gwCtx = new GatewayContext (false );
309+ return producerService .publishDataEvent (subInfo , ctx , bundle , gwCtx );
310+ } catch (ExpiredSubscriberInfoException e ) {
311+ loginEventSubInfo .remove (subInfoKey );
199312 }
200- };
313+ }
201314 }
202315
203316 private Flow <Void > onRequestSession (final RequestContext ctx_ , final String sessionId ) {
@@ -940,6 +1053,33 @@ private static int byteToDigit(byte b) {
9401053 return -1 ;
9411054 }
9421055
1056+ protected static String anonymizeUser (final UserIdCollectionMode mode , final String userId ) {
1057+ if (mode != ANONYMIZATION || userId == null ) {
1058+ return userId ;
1059+ }
1060+ MessageDigest digest ;
1061+ try {
1062+ // TODO avoid lookup a new instance every time
1063+ digest = MessageDigest .getInstance ("SHA-256" );
1064+ } catch (NoSuchAlgorithmException e ) {
1065+ if (!SHA_MISSING_REPORTED .getAndSet (true )) {
1066+ log .error (
1067+ SEND_TELEMETRY ,
1068+ "Missing SHA-256 digest, user collection in 'anon' mode cannot continue" ,
1069+ e );
1070+ }
1071+ return null ;
1072+ }
1073+ digest .update (userId .getBytes ());
1074+ byte [] hash = digest .digest ();
1075+ if (hash .length > HASH_SIZE_BYTES ) {
1076+ byte [] temp = new byte [HASH_SIZE_BYTES ];
1077+ System .arraycopy (hash , 0 , temp , 0 , temp .length );
1078+ hash = temp ;
1079+ }
1080+ return ANON_PREFIX + toHexString (hash );
1081+ }
1082+
9431083 private static class IGAppSecEventDependencies {
9441084
9451085 private static final Map <Address <?>, Collection <datadog .trace .api .gateway .EventType <?>>>
0 commit comments