|
1 | 1 | package io.codemodder.codemods; |
2 | 2 |
|
3 | | -import static io.codemodder.CodemodResources.getClassResourceAsString; |
4 | | - |
| 3 | +import com.contrastsecurity.sarif.PhysicalLocation; |
5 | 4 | import com.contrastsecurity.sarif.Result; |
6 | | -import com.github.difflib.patch.AbstractDelta; |
7 | | -import com.github.difflib.patch.DeleteDelta; |
8 | | -import com.github.difflib.patch.Patch; |
| 5 | +import com.contrastsecurity.sarif.Run; |
| 6 | +import com.fasterxml.jackson.annotation.JsonProperty; |
| 7 | +import com.fasterxml.jackson.databind.ObjectMapper; |
| 8 | +import com.github.javaparser.StaticJavaParser; |
| 9 | +import com.github.javaparser.ast.CompilationUnit; |
| 10 | +import com.github.javaparser.ast.stmt.Statement; |
| 11 | +import com.theokanning.openai.completion.chat.*; |
9 | 12 | import io.codemodder.*; |
| 13 | +import io.codemodder.javaparser.JavaParserChanger; |
10 | 14 | import io.codemodder.plugins.llm.OpenAIService; |
11 | | -import io.codemodder.plugins.llm.SarifToLLMForBinaryVerificationAndFixingCodemod; |
12 | 15 | import io.codemodder.providers.sarif.semgrep.SemgrepScan; |
| 16 | +import java.io.IOException; |
| 17 | +import java.io.UncheckedIOException; |
| 18 | +import java.nio.file.Files; |
| 19 | +import java.util.ArrayList; |
13 | 20 | import java.util.List; |
| 21 | +import java.util.Objects; |
| 22 | +import java.util.Optional; |
14 | 23 | import javax.inject.Inject; |
15 | 24 |
|
16 | 25 | /** A codemod that removes any sensitive data being logged. */ |
17 | 26 | @Codemod( |
18 | 27 | id = "pixee:java/sensitive-data-logging", |
19 | 28 | importance = Importance.HIGH, |
20 | 29 | reviewGuidance = ReviewGuidance.MERGE_AFTER_REVIEW) |
21 | | -public final class SensitiveDataLoggingCodemod |
22 | | - extends SarifToLLMForBinaryVerificationAndFixingCodemod { |
| 30 | +public final class SensitiveDataLoggingCodemod extends JavaParserChanger { |
| 31 | + |
| 32 | + private final RuleSarif sarif; |
| 33 | + private final OpenAIService service; |
23 | 34 |
|
24 | 35 | @Inject |
25 | 36 | public SensitiveDataLoggingCodemod( |
26 | 37 | @SemgrepScan(ruleId = "sensitive-data-logging") final RuleSarif sarif, |
27 | | - final OpenAIService openAI) { |
28 | | - super(sarif, openAI); |
| 38 | + final OpenAIService openAIService) { |
| 39 | + this.sarif = sarif; |
| 40 | + this.service = Objects.requireNonNull(openAIService); |
29 | 41 | } |
30 | 42 |
|
31 | 43 | @Override |
32 | | - protected String getThreatPrompt( |
33 | | - final CodemodInvocationContext context, final List<Result> results) { |
34 | | - return getClassResourceAsString(getClass(), "threat_prompt.txt"); |
| 44 | + public CodemodFileScanningResult visit( |
| 45 | + final CodemodInvocationContext context, final CompilationUnit cu) { |
| 46 | + |
| 47 | + List<Result> results = sarif.getResultsByLocationPath(context.path()); |
| 48 | + |
| 49 | + String rawFile; |
| 50 | + try { |
| 51 | + rawFile = Files.readString(context.path()); |
| 52 | + } catch (IOException e) { |
| 53 | + throw new UncheckedIOException("Couldn't read file", e); |
| 54 | + } |
| 55 | + |
| 56 | + List<String> rawLines = rawFile.lines().toList(); |
| 57 | + |
| 58 | + List<CodemodChange> changes = new ArrayList<>(); |
| 59 | + for (Result result : results) { |
| 60 | + PhysicalLocation physicalLocation = result.getLocations().get(0).getPhysicalLocation(); |
| 61 | + Integer startLine = physicalLocation.getRegion().getStartLine(); |
| 62 | + Optional<Statement> statement = getSingleStatement(cu, startLine); |
| 63 | + |
| 64 | + if (statement.isPresent()) { |
| 65 | + SensitivityAndFixAnalysis analysis; |
| 66 | + try { |
| 67 | + analysis = performSensitivityAnalysis(rawLines, startLine); |
| 68 | + } catch (IOException e) { |
| 69 | + throw new UncheckedIOException("Couldn't perform sensitivity analysis", e); |
| 70 | + } |
| 71 | + if (analysis.isSensitiveAndDirectlyLogged()) { |
| 72 | + String newStatement = analysis.newStatement(); |
| 73 | + if (newStatement != null && !newStatement.isBlank()) { |
| 74 | + Statement newStmt = StaticJavaParser.parseStatement(newStatement); |
| 75 | + statement.get().replace(newStmt); |
| 76 | + |
| 77 | + String analysisText = analysis.isSensitiveAnalysisText(); |
| 78 | + CodemodChange change = CodemodChange.from(startLine, analysisText); |
| 79 | + changes.add(change); |
| 80 | + } |
| 81 | + } |
| 82 | + } |
| 83 | + } |
| 84 | + return CodemodFileScanningResult.from(changes, List.of()); |
35 | 85 | } |
36 | 86 |
|
37 | | - @Override |
38 | | - protected String getFixPrompt() { |
39 | | - return getClassResourceAsString(getClass(), "fix_prompt.txt"); |
| 87 | + private SensitivityAndFixAnalysis performSensitivityAnalysis( |
| 88 | + final List<String> lines, final Integer startLine) throws IOException { |
| 89 | + String codeSnippet = numberedContextWithExtraLines(lines, startLine, 10, 10); |
| 90 | + String prompt = |
| 91 | + """ |
| 92 | + A tool has cited line %d of the code for possibly logging sensitive data: |
| 93 | +
|
| 94 | + %s |
| 95 | +
|
| 96 | + Respond ONLY in the form of JSON with the following keys, in this order: |
| 97 | +
|
| 98 | + sensitive_analysis_text: a careful, thorough analysis of whether the data is sensitive (specifically a password, session ID, security token, SSN, etc -- not a username) |
| 99 | + is_data_directly_logged: a careful, thorough analysis of whether the data is definitely and directly logged (e.g., not just passed to another method inside to the scope, unless that's a method that obviously returns the given string) |
| 100 | + is_it_sensitive_and_directly_logged: a boolean dictating whether it is sensitive and definitely and directly logged |
| 101 | + new_line_to_replace: if sensitive and directly logged, the statement on line %d that should replace it -- remember to correctly JSON escape this value |
| 102 | + """ |
| 103 | + .formatted(startLine, codeSnippet, startLine); |
| 104 | + |
| 105 | + ChatCompletionRequest request = |
| 106 | + ChatCompletionRequest.builder() |
| 107 | + .temperature(0D) |
| 108 | + .model("gpt-4o-2024-05-13") |
| 109 | + // .model("gpt-4-0613") |
| 110 | + .n(1) |
| 111 | + .messages(List.of(new ChatMessage(ChatMessageRole.USER.value(), prompt))) |
| 112 | + .build(); |
| 113 | + ChatCompletionResult completion = service.createChatCompletion(request); |
| 114 | + ChatCompletionChoice response = completion.getChoices().get(0); |
| 115 | + String responseText = response.getMessage().getContent(); |
| 116 | + if (responseText.startsWith("```json") && responseText.endsWith("```")) { |
| 117 | + responseText = responseText.substring(7, responseText.length() - 3); |
| 118 | + } |
| 119 | + return new ObjectMapper().readValue(responseText, SensitivityAndFixAnalysisDTO.class); |
40 | 120 | } |
41 | 121 |
|
42 | | - @Override |
43 | | - protected boolean isPatchExpected(Patch<String> patch) { |
44 | | - // This codemod should only delete lines. |
45 | | - for (AbstractDelta<String> delta : patch.getDeltas()) { |
46 | | - if (!(delta instanceof DeleteDelta<String>)) { |
47 | | - return false; |
48 | | - } |
| 122 | + /** |
| 123 | + * We can fix if there's only one statement on the given line (meaning, it may span multiple |
| 124 | + * lines, but only one statement is started on the lin. |
| 125 | + */ |
| 126 | + private Optional<Statement> getSingleStatement(final CompilationUnit cu, final Integer line) { |
| 127 | + List<Statement> statements = |
| 128 | + cu.findAll(Statement.class).stream() |
| 129 | + .filter(s -> s.getRange().isPresent()) |
| 130 | + .filter(s -> s.getRange().get().begin.line == line) |
| 131 | + .toList(); |
| 132 | + if (statements.size() == 1) { |
| 133 | + return Optional.of(statements.get(0)); |
| 134 | + } |
| 135 | + return Optional.empty(); |
| 136 | + } |
| 137 | + |
| 138 | + /** The results of the sensitivity analysis and, optionally, the fix to apply. */ |
| 139 | + private interface SensitivityAndFixAnalysis { |
| 140 | + |
| 141 | + /** |
| 142 | + * A detailed analysis of whether the data is sensitive, like a password, security token, etc. |
| 143 | + * and its directly logged. |
| 144 | + */ |
| 145 | + String isSensitiveAnalysisText(); |
| 146 | + |
| 147 | + /** Whether the statement logs sensitive data. */ |
| 148 | + boolean isSensitiveAndDirectlyLogged(); |
| 149 | + |
| 150 | + /** The new statement with which to replace the old. */ |
| 151 | + String newStatement(); |
| 152 | + } |
| 153 | + |
| 154 | + private static class SensitivityAndFixAnalysisDTO implements SensitivityAndFixAnalysis { |
| 155 | + @JsonProperty("sensitive_analysis_text") |
| 156 | + private String sensitiveAnalysisText; |
| 157 | + |
| 158 | + @JsonProperty("is_data_directly_logged") |
| 159 | + private String isDataDirectlyLogged; |
| 160 | + |
| 161 | + @JsonProperty("is_it_sensitive_and_directly_logged") |
| 162 | + private boolean isSensitiveAndDirectlyLogged; |
| 163 | + |
| 164 | + @JsonProperty("new_line_to_replace") |
| 165 | + private String newLineToReplace; |
| 166 | + |
| 167 | + @Override |
| 168 | + public String isSensitiveAnalysisText() { |
| 169 | + return sensitiveAnalysisText; |
| 170 | + } |
| 171 | + |
| 172 | + @Override |
| 173 | + public boolean isSensitiveAndDirectlyLogged() { |
| 174 | + return isSensitiveAndDirectlyLogged; |
| 175 | + } |
| 176 | + |
| 177 | + @Override |
| 178 | + public String newStatement() { |
| 179 | + return newLineToReplace; |
49 | 180 | } |
| 181 | + } |
50 | 182 |
|
51 | | - return true; |
| 183 | + private static String numberedContextWithExtraLines( |
| 184 | + final List<String> lines, |
| 185 | + final int line, |
| 186 | + final int extraLinesBefore, |
| 187 | + final int extraLinesAfter) { |
| 188 | + StringBuilder sb = new StringBuilder(); |
| 189 | + int startLine = Math.max(0, line - extraLinesBefore); |
| 190 | + int endLine = Math.min(lines.size(), line + extraLinesAfter); |
| 191 | + for (int i = startLine; i < endLine; i++) { |
| 192 | + sb.append(i + 1); |
| 193 | + sb.append(": "); |
| 194 | + sb.append(lines.get(i)); |
| 195 | + sb.append("\n"); |
| 196 | + } |
| 197 | + return sb.toString(); |
| 198 | + } |
| 199 | + |
| 200 | + @Override |
| 201 | + public boolean shouldRun() { |
| 202 | + List<Run> runs = sarif.rawDocument().getRuns(); |
| 203 | + return runs != null && !runs.isEmpty() && !runs.get(0).getResults().isEmpty(); |
52 | 204 | } |
53 | 205 | } |
0 commit comments