Skip to content
Closed

Dlp #1909

Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
bcc7dcb
add dlp
russelmrcl Jun 23, 2025
497a43c
add comments
russelmrcl Jun 23, 2025
f0119d3
wip
russelmrcl Jun 23, 2025
5f84eb6
refactor code
russelmrcl Jun 23, 2025
e2536fe
wip
russelmrcl Jun 23, 2025
6ebb9c4
wip
russelmrcl Jun 23, 2025
9d2e722
wip
russelmrcl Jun 23, 2025
699b226
docs: minor
predic8 Jun 23, 2025
7ffee70
wip
russelmrcl Jun 23, 2025
559a62b
wip
russelmrcl Jun 23, 2025
de3ed95
wip
russelmrcl Jun 23, 2025
4c3ff8f
wip: dlp
russelmrcl Jun 26, 2025
8cfb161
wip: dlp
russelmrcl Jun 26, 2025
b17b14a
wip: dlp
russelmrcl Jun 26, 2025
099ec70
add test
russelmrcl Jun 26, 2025
02d75ec
wip
russelmrcl Jun 30, 2025
2be464b
add strategy pattern
russelmrcl Jun 30, 2025
450204b
wip test
russelmrcl Jun 30, 2025
75b56d9
wip
russelmrcl Jun 30, 2025
8adedb9
wip
russelmrcl Jun 30, 2025
e7cc895
refactor code
russelmrcl Jun 30, 2025
0f673bc
add tests
russelmrcl Jun 30, 2025
6a0bed1
wip
russelmrcl Jun 30, 2025
469256e
resolve conversations
russelmrcl Jun 30, 2025
53bc2f8
convert to json parse
russelmrcl Jun 30, 2025
7d04071
Merge branch 'master' into dlp
christiangoerdes Jun 30, 2025
ff15443
add path
russelmrcl Jun 30, 2025
ea1e179
wip
russelmrcl Jun 30, 2025
dce7806
add mask
russelmrcl Jun 30, 2025
e2eaa48
wip
russelmrcl Jul 7, 2025
13711b1
wip
russelmrcl Jul 17, 2025
7aa7e95
wip
russelmrcl Jul 17, 2025
60a13df
fix
russelmrcl Jul 17, 2025
0bf1268
refactor
russelmrcl Jul 17, 2025
21c3489
wip
russelmrcl Jul 17, 2025
20ed318
refactor code
russelmrcl Jul 21, 2025
c20c70d
improve log
russelmrcl Jul 21, 2025
528cc14
add docs
russelmrcl Jul 21, 2025
8d75f7c
edit docs
russelmrcl Jul 21, 2025
c3ec071
wip
russelmrcl Aug 1, 2025
e9a6286
refactor code
russelmrcl Aug 1, 2025
8b1a278
refactor code
russelmrcl Aug 1, 2025
77022dd
refactor code
russelmrcl Aug 26, 2025
5008ee7
Merge branch 'master' into dlp
russelmrcl Sep 11, 2025
e58e11d
Merge branch 'master' into dlp
christiangoerdes Nov 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.predic8.membrane.core.interceptor.dlp;

import com.predic8.membrane.annot.MCAttribute;

public abstract class Action {

private String field;

public abstract String apply(String json);

public String getField() {
return field;
}

@MCAttribute
public void setField(String field) {
this.field = field;
}
Comment on lines +13 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add validation for the field parameter.

The setter should validate that the field is not empty and is a valid JSONPath expression to prevent runtime errors.

 @MCAttribute
 public void setField(String field) {
+    if (field != null && field.trim().isEmpty()) {
+        throw new IllegalArgumentException("field cannot be empty");
+    }
     this.field = field;
 }

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In core/src/main/java/com/predic8/membrane/core/interceptor/dlp/Action.java
around lines 15 to 18, the setField method lacks validation for the input
parameter. Add validation to ensure the field parameter is not null or empty and
verify it is a valid JSONPath expression before assigning it to the field
variable. If validation fails, throw an appropriate exception to prevent runtime
errors.

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package com.predic8.membrane.core.interceptor.dlp;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;

/**
* Loads field risk mappings from CSV file with format:
* field_name,description,risk_level
* where risk_level should be one of: high, medium, low, unclassified
*/
public class CsvFieldConfiguration implements FieldConfiguration {

private static final Logger log = LoggerFactory.getLogger(CsvFieldConfiguration.class);

// Optional: define allowed risk levels
private static final Set<String> ALLOWED_RISK_LEVELS = Set.of("high", "medium", "low", "unclassified");

@Override
public Map<String, String> getFields(String fileName) {
try (InputStream inputStream = CsvFieldConfiguration.class.getClassLoader().getResourceAsStream(fileName)) {
if (inputStream == null) {
log.error("Could not find file: {}", fileName);
throw new NullPointerException("InputStream is null. File not found: " + fileName);
}

Map<String, String> riskDict = new HashMap<>();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line;
boolean isHeader = true;

while ((line = reader.readLine()) != null) {
line = line.trim();
if (isHeader) {
isHeader = false;
continue;
}
if (line.isEmpty() || line.startsWith("#")) continue;

String[] parts = line.split(",", -1);
if (parts.length >= 2) {
String field = parts[0].trim().toLowerCase(Locale.ROOT);
String riskLevel = parts[parts.length - 1].trim().toLowerCase(Locale.ROOT);

if (!ALLOWED_RISK_LEVELS.contains(riskLevel)) {
log.warn("Unknown risk level '{}' for field '{}'", riskLevel, field);
}

riskDict.put(field, riskLevel);
} else {
log.warn("Invalid CSV line (too few columns): {}", line);
}
}

return riskDict;
} catch (IOException e) {
throw new RuntimeException("Failed to load risk data from " + fileName, e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package com.predic8.membrane.core.interceptor.dlp;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonFactoryBuilder;
import com.fasterxml.jackson.core.StreamReadConstraints;
import com.fasterxml.jackson.core.json.JsonReadFeature;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.predic8.membrane.core.http.Message;

import java.io.InputStream;
import java.util.*;

public class DLPAnalyzer {

private static final JsonFactory JSON_FACTORY = new JsonFactoryBuilder()
.configure(JsonReadFeature.ALLOW_TRAILING_COMMA, true)
.configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS, false)
.streamReadConstraints(StreamReadConstraints.builder()
.maxNestingDepth(64)
.maxStringLength(16 * 1024)
.build())
.build();

private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);

private final Map<String, String> riskDict;

public DLPAnalyzer(Map<String, String> riskDict) {
this.riskDict = Map.copyOf(riskDict);
}

public RiskReport analyze(Message msg) {
try (InputStream is = msg.getBodyAsStreamDecoded()) {
RiskReport report = new RiskReport();
traverse(MAPPER.readTree(is), new ArrayDeque<>(), report);
return report;
} catch (Exception e) {
throw new RuntimeException("Failed to analyse message", e);
}
}

private void traverse(JsonNode node, Deque<String> path, RiskReport report) {
if (node.isObject()) {
node.fieldNames().forEachRemaining(fieldName -> {
path.addLast(fieldName);
traverse(node.get(fieldName), path, report);
path.removeLast();
});
} else if (node.isArray()) {
for (JsonNode child : node) {
traverse(child, path, report);
}
} else {
String fullPath = String.join(".", path).toLowerCase(Locale.ROOT);
String simpleName = path.isEmpty() ? "" : path.getLast().toLowerCase(Locale.ROOT);
report.recordField(fullPath, classify(fullPath, simpleName));
}
}

private String classify(String fullPath, String simpleName) {
return Optional.ofNullable(riskDict.get(fullPath))
.or(() -> Optional.ofNullable(riskDict.get(simpleName)))
.orElse("unclassified")
.toLowerCase(Locale.ROOT);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package com.predic8.membrane.core.interceptor.dlp;

import com.predic8.membrane.annot.MCAttribute;
import com.predic8.membrane.annot.MCChildElement;
import com.predic8.membrane.annot.MCElement;
import com.predic8.membrane.core.exchange.Exchange;
import com.predic8.membrane.core.http.Message;
import com.predic8.membrane.core.interceptor.AbstractInterceptor;
import com.predic8.membrane.core.interceptor.Outcome;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE;

@MCElement(name = "dlp")
public class DLPInterceptor extends AbstractInterceptor {

private static final Logger log = LoggerFactory.getLogger(DLPInterceptor.class);
private DLPAnalyzer dlpAnalyzer;
private String fieldsConfig;
private List<Mask> masks = new ArrayList<>();
private List<Filter> filters = new ArrayList<>();
private List<Report> reports = new ArrayList<>();

@Override
public void init() {
if (fieldsConfig != null) {
dlpAnalyzer = new DLPAnalyzer(new CsvFieldConfiguration().getFields(fieldsConfig));
} else {
dlpAnalyzer = new DLPAnalyzer(java.util.Map.of());
}
super.init();
}

@Override
public Outcome handleRequest(Exchange exc) {
return handleInternal(exc.getRequest());
}

@Override
public Outcome handleResponse(Exchange exc) {
return handleInternal(exc.getResponse());
}

public Outcome handleInternal(Message msg) {
try {
log.info("DLP Risk Analysis: {}", dlpAnalyzer.analyze(msg).getLogReport());

if (!masks.isEmpty()) {
for (Mask mask : masks) {
msg.setBodyContent(mask.apply(msg.getBodyAsStringDecoded())
.getBytes(StandardCharsets.UTF_8));
}
}

if (!filters.isEmpty()) {
for (Filter filter : filters) {
msg.setBodyContent(filter.apply(msg.getBodyAsStringDecoded())
.getBytes(StandardCharsets.UTF_8));
}
}

if (!reports.isEmpty()) {
for (Report report : reports) {
msg.setBodyContent(report.apply(msg.getBodyAsStringDecoded(), dlpAnalyzer.analyze(msg)).getBytes(StandardCharsets.UTF_8));
}
}

return CONTINUE;
} catch (Exception e) {
log.error("Exception in DLPInterceptor handleInternal: ", e);
return Outcome.ABORT;
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Optimize analyzer usage and add input validation.

The method has performance and validation issues:

  1. The analyzer is called multiple times (lines 51 and 69)
  2. No validation of message content before processing
  3. Each transformation re-encodes the body which is inefficient
 public Outcome handleInternal(Message msg) {
+    if (msg == null || msg.getBodyAsStringDecoded() == null || msg.getBodyAsStringDecoded().isEmpty()) {
+        return CONTINUE;
+    }
+    
     try {
-        log.info("DLP Risk Analysis: {}", dlpAnalyzer.analyze(msg).getLogReport());
+        RiskReport riskReport = dlpAnalyzer.analyze(msg);
+        log.info("DLP Risk Analysis: {}", riskReport.getLogReport());
+        
+        String body = msg.getBodyAsStringDecoded();

         if (!masks.isEmpty()) {
             for (Mask mask : masks) {
-                msg.setBodyContent(mask.apply(msg.getBodyAsStringDecoded())
-                        .getBytes(StandardCharsets.UTF_8));
+                body = mask.apply(body);
             }
         }

         if (!filters.isEmpty()) {
             for (Filter filter : filters) {
-                msg.setBodyContent(filter.apply(msg.getBodyAsStringDecoded())
-                        .getBytes(StandardCharsets.UTF_8));
+                body = filter.apply(body);
             }
         }

         if (!reports.isEmpty()) {
             for (Report report : reports) {
-                msg.setBodyContent(report.apply(msg.getBodyAsStringDecoded(), dlpAnalyzer.analyze(msg)).getBytes(StandardCharsets.UTF_8));
+                body = report.apply(body, riskReport);
             }
         }
+        
+        msg.setBodyContent(body.getBytes(StandardCharsets.UTF_8));

         return CONTINUE;
     } catch (Exception e) {
         log.error("Exception in DLPInterceptor handleInternal: ", e);
         return Outcome.ABORT;
     }
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
public Outcome handleInternal(Message msg) {
try {
log.info("DLP Risk Analysis: {}", dlpAnalyzer.analyze(msg).getLogReport());
if (!masks.isEmpty()) {
for (Mask mask : masks) {
msg.setBodyContent(mask.apply(msg.getBodyAsStringDecoded())
.getBytes(StandardCharsets.UTF_8));
}
}
if (!filters.isEmpty()) {
for (Filter filter : filters) {
msg.setBodyContent(filter.apply(msg.getBodyAsStringDecoded())
.getBytes(StandardCharsets.UTF_8));
}
}
if (!reports.isEmpty()) {
for (Report report : reports) {
msg.setBodyContent(report.apply(msg.getBodyAsStringDecoded(), dlpAnalyzer.analyze(msg)).getBytes(StandardCharsets.UTF_8));
}
}
return CONTINUE;
} catch (Exception e) {
log.error("Exception in DLPInterceptor handleInternal: ", e);
return Outcome.ABORT;
}
}
public Outcome handleInternal(Message msg) {
+ // input validation
+ if (msg == null || msg.getBodyAsStringDecoded() == null || msg.getBodyAsStringDecoded().isEmpty()) {
+ return CONTINUE;
+ }
try {
- log.info("DLP Risk Analysis: {}", dlpAnalyzer.analyze(msg).getLogReport());
+ // analyze once
+ RiskReport riskReport = dlpAnalyzer.analyze(msg);
+ log.info("DLP Risk Analysis: {}", riskReport.getLogReport());
+
+ // work off a single mutable body string
+ String body = msg.getBodyAsStringDecoded();
if (!masks.isEmpty()) {
for (Mask mask : masks) {
- msg.setBodyContent(mask.apply(msg.getBodyAsStringDecoded())
- .getBytes(StandardCharsets.UTF_8));
+ body = mask.apply(body);
}
}
if (!filters.isEmpty()) {
for (Filter filter : filters) {
- msg.setBodyContent(filter.apply(msg.getBodyAsStringDecoded())
- .getBytes(StandardCharsets.UTF_8));
+ body = filter.apply(body);
}
}
if (!reports.isEmpty()) {
for (Report report : reports) {
- msg.setBodyContent(report.apply(msg.getBodyAsStringDecoded(), dlpAnalyzer.analyze(msg)).getBytes(StandardCharsets.UTF_8));
+ body = report.apply(body, riskReport);
}
}
+
+ // apply all transformations at once
+ msg.setBodyContent(body.getBytes(StandardCharsets.UTF_8));
return CONTINUE;
} catch (Exception e) {
log.error("Exception in DLPInterceptor handleInternal: ", e);
return Outcome.ABORT;
}
}
🤖 Prompt for AI Agents
In
core/src/main/java/com/predic8/membrane/core/interceptor/dlp/DLPInterceptor.java
between lines 49 and 78, optimize by calling dlpAnalyzer.analyze(msg) once and
reusing its result to avoid redundant processing. Add validation to check if the
message body is non-null and non-empty before applying masks, filters, or
reports. Instead of repeatedly decoding and re-encoding the message body for
each transformation, decode once, apply all transformations sequentially on the
decoded string, then encode and set the body content once at the end.



public String getFieldsConfig() {
return fieldsConfig;
}

@MCAttribute
public void setFieldsConfig(String fieldsConfig) {
this.fieldsConfig = fieldsConfig;
}

public List<Mask> getMasks() {
return masks;
}

@MCChildElement
public DLPInterceptor setMasks(List<Mask> masks) {
this.masks = masks;
return this;
}

public List<Filter> getFilters() {
return filters;
}

@MCChildElement(order = 1)
public DLPInterceptor setFilters(List<Filter> filters) {
this.filters = filters;
return this;
}

public List<Report> getReports() {
return reports;
}

@MCChildElement(order = 2)
public void setReports(List<Report> reports) {
this.reports = reports;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.predic8.membrane.core.interceptor.dlp;

import com.predic8.membrane.annot.MCAttribute;
import com.predic8.membrane.annot.MCElement;

@MCElement(name = "field")
public class Field {

private String jsonpath;

public String getJsonpath() {
return jsonpath;
}

@MCAttribute
public void setJsonpath(String jsonpath) {
this.jsonpath = jsonpath;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package com.predic8.membrane.core.interceptor.dlp;

import com.predic8.membrane.annot.MCAttribute;
import com.predic8.membrane.annot.MCElement;
import com.predic8.membrane.core.http.Message;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Locale;
import java.util.Objects;
import java.util.regex.Pattern;

@MCElement(name = "field")
public class Field {

private static final Logger log = LoggerFactory.getLogger(Field.class);

private String name;
private Action action = Action.REPORT;
private Pattern compiled = Pattern.compile(".*");

@MCAttribute
public void setName(String name) {
this.name = Objects.requireNonNull(name, "field name must not be null");
this.compiled = Pattern.compile(name, Pattern.CASE_INSENSITIVE);
}

@MCAttribute
public void setAction(String action) {
this.action = Action.valueOf(action.toUpperCase(Locale.ROOT));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Validate action string before conversion.

The method doesn't validate if the provided action string is a valid enum value before conversion, which could lead to IllegalArgumentException.

 @MCAttribute
 public void setAction(String action) {
+    Objects.requireNonNull(action, "action must not be null");
+    try {
         this.action = Action.valueOf(action.toUpperCase(Locale.ROOT));
+    } catch (IllegalArgumentException e) {
+        throw new IllegalArgumentException("Invalid action: " + action + ". Valid actions are: " + 
+            String.join(", ", Arrays.stream(Action.values()).map(Enum::name).toArray(String[]::new)), e);
+    }
 }

You'll need to add this import:

+import java.util.Arrays;
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
public void setAction(String action) {
this.action = Action.valueOf(action.toUpperCase(Locale.ROOT));
}
import java.util.Arrays;
@MCAttribute
public void setAction(String action) {
Objects.requireNonNull(action, "action must not be null");
try {
this.action = Action.valueOf(action.toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"Invalid action: " + action +
". Valid actions are: " +
String.join(", ",
Arrays.stream(Action.values())
.map(Enum::name)
.toArray(String[]::new)
),
e
);
}
}
🤖 Prompt for AI Agents
In core/src/main/java/com/predic8/membrane/core/interceptor/dlp/Field.txt around
lines 29 to 31, the setAction method converts the input string to an enum
without validation, risking IllegalArgumentException. To fix this, validate the
input string against the Action enum values before conversion. If the string is
invalid, handle it gracefully by throwing a custom exception or logging an
error. Also, add the necessary import for Locale if not already present.


public String getName() {
return name;
}

public String getAction() {
return action.name().toLowerCase(Locale.ROOT);
}

public void handleAction(Message msg) {
try {
FieldActionStrategy.of(action.name().toLowerCase(Locale.ROOT)).apply(msg, compiled);
} catch (Exception e) {
log.error("DLP field action '{}' failed: {}", name, e);
}
}

private enum Action {MASK, FILTER, REPORT}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.predic8.membrane.core.interceptor.dlp;

import com.predic8.membrane.core.http.Message;

import java.util.Locale;
import java.util.regex.Pattern;

public interface FieldActionStrategy {
void apply(Message msg, Pattern pattern);

static FieldActionStrategy of(String action) {
return switch (action.toLowerCase(Locale.ROOT)) {
case "mask" -> new MaskField();
case "filter" -> new FilterField();
case "report" -> new ReportField();
default -> throw new IllegalArgumentException("Unknown action: " + action);
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.predic8.membrane.core.interceptor.dlp;

import java.util.Map;

public interface FieldConfiguration {
Map<String, String> getFields(String fileName);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package com.predic8.membrane.core.interceptor.dlp;

import com.predic8.membrane.annot.MCChildElement;
import com.predic8.membrane.annot.MCElement;

import java.util.ArrayList;
import java.util.List;

@MCElement(name = "fields")
public class Fields {

private List<Field> fields = new ArrayList<>();

@MCChildElement
public Fields setFields(List<Field> fields) {
this.fields = fields;
return this;
}

public List<Field> getFields() {
return fields;
}
}
Loading