Skip to content

Commit 761f8c2

Browse files
nahsragilday
authored andcommitted
try to redo sensitive log detection with gpt-4o
1 parent 7172e20 commit 761f8c2

File tree

2 files changed

+177
-25
lines changed

2 files changed

+177
-25
lines changed
Lines changed: 176 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,205 @@
11
package io.codemodder.codemods;
22

3-
import static io.codemodder.CodemodResources.getClassResourceAsString;
4-
3+
import com.contrastsecurity.sarif.PhysicalLocation;
54
import com.contrastsecurity.sarif.Result;
6-
import com.github.difflib.patch.AbstractDelta;
7-
import com.github.difflib.patch.DeleteDelta;
8-
import com.github.difflib.patch.Patch;
5+
import com.contrastsecurity.sarif.Run;
6+
import com.fasterxml.jackson.annotation.JsonProperty;
7+
import com.fasterxml.jackson.databind.ObjectMapper;
8+
import com.github.javaparser.StaticJavaParser;
9+
import com.github.javaparser.ast.CompilationUnit;
10+
import com.github.javaparser.ast.stmt.Statement;
11+
import com.theokanning.openai.completion.chat.*;
912
import io.codemodder.*;
13+
import io.codemodder.javaparser.JavaParserChanger;
1014
import io.codemodder.plugins.llm.OpenAIService;
11-
import io.codemodder.plugins.llm.SarifToLLMForBinaryVerificationAndFixingCodemod;
1215
import io.codemodder.providers.sarif.semgrep.SemgrepScan;
16+
import java.io.IOException;
17+
import java.io.UncheckedIOException;
18+
import java.nio.file.Files;
19+
import java.util.ArrayList;
1320
import java.util.List;
21+
import java.util.Objects;
22+
import java.util.Optional;
1423
import javax.inject.Inject;
1524

1625
/** A codemod that removes any sensitive data being logged. */
1726
@Codemod(
1827
id = "pixee:java/sensitive-data-logging",
1928
importance = Importance.HIGH,
2029
reviewGuidance = ReviewGuidance.MERGE_AFTER_REVIEW)
21-
public final class SensitiveDataLoggingCodemod
22-
extends SarifToLLMForBinaryVerificationAndFixingCodemod {
30+
public final class SensitiveDataLoggingCodemod extends JavaParserChanger {
31+
32+
private final RuleSarif sarif;
33+
private final OpenAIService service;
2334

2435
@Inject
2536
public SensitiveDataLoggingCodemod(
2637
@SemgrepScan(ruleId = "sensitive-data-logging") final RuleSarif sarif,
27-
final OpenAIService openAI) {
28-
super(sarif, openAI);
38+
final OpenAIService openAIService) {
39+
this.sarif = sarif;
40+
this.service = Objects.requireNonNull(openAIService);
2941
}
3042

3143
@Override
32-
protected String getThreatPrompt(
33-
final CodemodInvocationContext context, final List<Result> results) {
34-
return getClassResourceAsString(getClass(), "threat_prompt.txt");
44+
public CodemodFileScanningResult visit(
45+
final CodemodInvocationContext context, final CompilationUnit cu) {
46+
47+
List<Result> results = sarif.getResultsByLocationPath(context.path());
48+
49+
String rawFile;
50+
try {
51+
rawFile = Files.readString(context.path());
52+
} catch (IOException e) {
53+
throw new UncheckedIOException("Couldn't read file", e);
54+
}
55+
56+
List<String> rawLines = rawFile.lines().toList();
57+
58+
List<CodemodChange> changes = new ArrayList<>();
59+
for (Result result : results) {
60+
PhysicalLocation physicalLocation = result.getLocations().get(0).getPhysicalLocation();
61+
Integer startLine = physicalLocation.getRegion().getStartLine();
62+
Optional<Statement> statement = getSingleStatement(cu, startLine);
63+
64+
if (statement.isPresent()) {
65+
SensitivityAndFixAnalysis analysis;
66+
try {
67+
analysis = performSensitivityAnalysis(rawLines, startLine);
68+
} catch (IOException e) {
69+
throw new UncheckedIOException("Couldn't perform sensitivity analysis", e);
70+
}
71+
if (analysis.isSensitiveAndDirectlyLogged()) {
72+
String newStatement = analysis.newStatement();
73+
if (newStatement != null && !newStatement.isBlank()) {
74+
Statement newStmt = StaticJavaParser.parseStatement(newStatement);
75+
statement.get().replace(newStmt);
76+
77+
String analysisText = analysis.isSensitiveAnalysisText();
78+
CodemodChange change = CodemodChange.from(startLine, analysisText);
79+
changes.add(change);
80+
}
81+
}
82+
}
83+
}
84+
return CodemodFileScanningResult.from(changes, List.of());
3585
}
3686

37-
@Override
38-
protected String getFixPrompt() {
39-
return getClassResourceAsString(getClass(), "fix_prompt.txt");
87+
private SensitivityAndFixAnalysis performSensitivityAnalysis(
88+
final List<String> lines, final Integer startLine) throws IOException {
89+
String codeSnippet = numberedContextWithExtraLines(lines, startLine, 10, 10);
90+
String prompt =
91+
"""
92+
A tool has cited line %d of the code for possibly logging sensitive data:
93+
94+
%s
95+
96+
Respond ONLY in the form of JSON with the following keys, in this order:
97+
98+
sensitive_analysis_text: a careful, thorough analysis of whether the data is sensitive (specifically a password, session ID, security token, SSN, etc -- not a username)
99+
is_data_directly_logged: a careful, thorough analysis of whether the data is definitely and directly logged (e.g., not just passed to another method inside to the scope, unless that's a method that obviously returns the given string)
100+
is_it_sensitive_and_directly_logged: a boolean dictating whether it is sensitive and definitely and directly logged
101+
new_line_to_replace: if sensitive and directly logged, the statement on line %d that should replace it -- remember to correctly JSON escape this value
102+
"""
103+
.formatted(startLine, codeSnippet, startLine);
104+
105+
ChatCompletionRequest request =
106+
ChatCompletionRequest.builder()
107+
.temperature(0D)
108+
.model("gpt-4o-2024-05-13")
109+
// .model("gpt-4-0613")
110+
.n(1)
111+
.messages(List.of(new ChatMessage(ChatMessageRole.USER.value(), prompt)))
112+
.build();
113+
ChatCompletionResult completion = service.createChatCompletion(request);
114+
ChatCompletionChoice response = completion.getChoices().get(0);
115+
String responseText = response.getMessage().getContent();
116+
if (responseText.startsWith("```json") && responseText.endsWith("```")) {
117+
responseText = responseText.substring(7, responseText.length() - 3);
118+
}
119+
return new ObjectMapper().readValue(responseText, SensitivityAndFixAnalysisDTO.class);
40120
}
41121

42-
@Override
43-
protected boolean isPatchExpected(Patch<String> patch) {
44-
// This codemod should only delete lines.
45-
for (AbstractDelta<String> delta : patch.getDeltas()) {
46-
if (!(delta instanceof DeleteDelta<String>)) {
47-
return false;
48-
}
122+
/**
123+
* We can fix if there's only one statement on the given line (meaning, it may span multiple
124+
* lines, but only one statement is started on the lin.
125+
*/
126+
private Optional<Statement> getSingleStatement(final CompilationUnit cu, final Integer line) {
127+
List<Statement> statements =
128+
cu.findAll(Statement.class).stream()
129+
.filter(s -> s.getRange().isPresent())
130+
.filter(s -> s.getRange().get().begin.line == line)
131+
.toList();
132+
if (statements.size() == 1) {
133+
return Optional.of(statements.get(0));
134+
}
135+
return Optional.empty();
136+
}
137+
138+
/** The results of the sensitivity analysis and, optionally, the fix to apply. */
139+
private interface SensitivityAndFixAnalysis {
140+
141+
/**
142+
* A detailed analysis of whether the data is sensitive, like a password, security token, etc.
143+
* and its directly logged.
144+
*/
145+
String isSensitiveAnalysisText();
146+
147+
/** Whether the statement logs sensitive data. */
148+
boolean isSensitiveAndDirectlyLogged();
149+
150+
/** The new statement with which to replace the old. */
151+
String newStatement();
152+
}
153+
154+
private static class SensitivityAndFixAnalysisDTO implements SensitivityAndFixAnalysis {
155+
@JsonProperty("sensitive_analysis_text")
156+
private String sensitiveAnalysisText;
157+
158+
@JsonProperty("is_data_directly_logged")
159+
private String isDataDirectlyLogged;
160+
161+
@JsonProperty("is_it_sensitive_and_directly_logged")
162+
private boolean isSensitiveAndDirectlyLogged;
163+
164+
@JsonProperty("new_line_to_replace")
165+
private String newLineToReplace;
166+
167+
@Override
168+
public String isSensitiveAnalysisText() {
169+
return sensitiveAnalysisText;
170+
}
171+
172+
@Override
173+
public boolean isSensitiveAndDirectlyLogged() {
174+
return isSensitiveAndDirectlyLogged;
175+
}
176+
177+
@Override
178+
public String newStatement() {
179+
return newLineToReplace;
49180
}
181+
}
50182

51-
return true;
183+
private static String numberedContextWithExtraLines(
184+
final List<String> lines,
185+
final int line,
186+
final int extraLinesBefore,
187+
final int extraLinesAfter) {
188+
StringBuilder sb = new StringBuilder();
189+
int startLine = Math.max(0, line - extraLinesBefore);
190+
int endLine = Math.min(lines.size(), line + extraLinesAfter);
191+
for (int i = startLine; i < endLine; i++) {
192+
sb.append(i + 1);
193+
sb.append(": ");
194+
sb.append(lines.get(i));
195+
sb.append("\n");
196+
}
197+
return sb.toString();
198+
}
199+
200+
@Override
201+
public boolean shouldRun() {
202+
List<Run> runs = sarif.rawDocument().getRuns();
203+
return runs != null && !runs.isEmpty() && !runs.get(0).getResults().isEmpty();
52204
}
53205
}

core-codemods/src/test/java/io/codemodder/codemods/SensitiveDataLoggingCodemodTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ final class SensitiveDataLoggingCodemodTest implements LLMVerifyingCodemodTestMi
1616
@Override
1717
public String getRequirementsPrompt() {
1818
return """
19-
- Remove ALL lines that log sensitive data at INFO or higher severities.
19+
- The code no longer logs sensitive data (or it does so at DEBUG or below level).
2020
""";
2121
}
2222
}

0 commit comments

Comments
 (0)