55import com .contrastsecurity .sarif .Run ;
66import com .fasterxml .jackson .annotation .JsonProperty ;
77import com .fasterxml .jackson .databind .ObjectMapper ;
8+ import com .fasterxml .jackson .databind .ObjectReader ;
89import com .github .javaparser .StaticJavaParser ;
910import com .github .javaparser .ast .CompilationUnit ;
1011import com .github .javaparser .ast .stmt .Statement ;
1617import java .io .IOException ;
1718import java .io .UncheckedIOException ;
1819import java .nio .file .Files ;
20+ import java .nio .file .Path ;
1921import java .util .ArrayList ;
2022import java .util .List ;
2123import java .util .Objects ;
2224import java .util .Optional ;
25+ import java .util .concurrent .atomic .AtomicInteger ;
26+ import java .util .stream .Collectors ;
27+ import java .util .stream .Stream ;
2328import javax .inject .Inject ;
2429
2530/** A codemod that removes any sensitive data being logged. */
@@ -31,62 +36,55 @@ public final class SensitiveDataLoggingCodemod extends JavaParserChanger {
3136
3237 private final RuleSarif sarif ;
3338 private final OpenAIService service ;
39+ private final ObjectReader reader ;
3440
3541 @ Inject
3642 public SensitiveDataLoggingCodemod (
3743 @ SemgrepScan (ruleId = "sensitive-data-logging" ) final RuleSarif sarif ,
3844 final OpenAIService openAIService ) {
39- this .sarif = sarif ;
45+ this .sarif = Objects . requireNonNull ( sarif ) ;
4046 this .service = Objects .requireNonNull (openAIService );
47+ this .reader = new ObjectMapper ().readerFor (SensitivityAndFixAnalysisDTO .class );
4148 }
4249
4350 @ Override
4451 public CodemodFileScanningResult visit (
4552 final CodemodInvocationContext context , final CompilationUnit cu ) {
46-
47- List <Result > results = sarif .getResultsByLocationPath (context .path ());
48-
49- String rawFile ;
50- try {
51- rawFile = Files .readString (context .path ());
52- } catch (IOException e ) {
53- throw new UncheckedIOException ("Couldn't read file" , e );
54- }
55-
56- List <String > rawLines = rawFile .lines ().toList ();
57-
58- List <CodemodChange > changes = new ArrayList <>();
59- for (Result result : results ) {
53+ final var source = context .path ();
54+ final List <Result > results = sarif .getResultsByLocationPath (source );
55+ final List <CodemodChange > changes = new ArrayList <>();
56+ for (final Result result : results ) {
6057 PhysicalLocation physicalLocation = result .getLocations ().get (0 ).getPhysicalLocation ();
6158 Integer startLine = physicalLocation .getRegion ().getStartLine ();
6259 Optional <Statement > statement = getSingleStatement (cu , startLine );
60+ if (statement .isEmpty ()) {
61+ continue ;
62+ }
6363
64- if (statement .isPresent ()) {
65- SensitivityAndFixAnalysis analysis ;
66- try {
67- analysis = performSensitivityAnalysis (rawLines , startLine );
68- } catch (IOException e ) {
69- throw new UncheckedIOException ("Couldn't perform sensitivity analysis" , e );
70- }
71- if (analysis .isSensitiveAndDirectlyLogged ()) {
72- String newStatement = analysis .newStatement ();
73- if (newStatement != null && !newStatement .isBlank ()) {
74- Statement newStmt = StaticJavaParser .parseStatement (newStatement );
75- statement .get ().replace (newStmt );
76-
77- String analysisText = analysis .isSensitiveAnalysisText ();
78- CodemodChange change = CodemodChange .from (startLine , analysisText );
79- changes .add (change );
80- }
64+ SensitivityAndFixAnalysis analysis ;
65+ try {
66+ analysis = performSensitivityAnalysis (source , startLine );
67+ } catch (IOException e ) {
68+ throw new UncheckedIOException ("Couldn't perform sensitivity analysis" , e );
69+ }
70+ if (analysis .isSensitiveAndDirectlyLogged ()) {
71+ String newStatement = analysis .newStatement ();
72+ if (newStatement != null && !newStatement .isBlank ()) {
73+ Statement newStmt = StaticJavaParser .parseStatement (newStatement );
74+ statement .get ().replace (newStmt );
75+
76+ String analysisText = analysis .isSensitiveAnalysisText ();
77+ CodemodChange change = CodemodChange .from (startLine , analysisText );
78+ changes .add (change );
8179 }
8280 }
8381 }
8482 return CodemodFileScanningResult .from (changes , List .of ());
8583 }
8684
8785 private SensitivityAndFixAnalysis performSensitivityAnalysis (
88- final List < String > lines , final Integer startLine ) throws IOException {
89- String codeSnippet = numberedContextWithExtraLines (lines , startLine , 10 , 10 );
86+ final Path source , final Integer startLine ) throws IOException {
87+ String codeSnippet = numberedContextWithExtraLines (source , startLine );
9088 String prompt =
9189 """
9290 A tool has cited line %d of the code for possibly logging sensitive data:
@@ -106,33 +104,28 @@ private SensitivityAndFixAnalysis performSensitivityAnalysis(
106104 ChatCompletionRequest .builder ()
107105 .temperature (0D )
108106 .model ("gpt-4o-2024-05-13" )
109- // .model("gpt-4-0613")
110107 .n (1 )
111108 .messages (List .of (new ChatMessage (ChatMessageRole .USER .value (), prompt )))
112109 .build ();
113110 ChatCompletionResult completion = service .createChatCompletion (request );
114111 ChatCompletionChoice response = completion .getChoices ().get (0 );
115112 String responseText = response .getMessage ().getContent ();
116113 if (responseText .startsWith ("```json" ) && responseText .endsWith ("```" )) {
117- responseText = responseText .substring (7 , responseText .length () - 3 );
114+ responseText =
115+ responseText .substring ("```json" .length (), responseText .length () - "```" .length ());
118116 }
119- return new ObjectMapper () .readValue (responseText , SensitivityAndFixAnalysisDTO . class );
117+ return reader .readValue (responseText );
120118 }
121119
122120 /**
123121 * We can fix if there's only one statement on the given line (meaning, it may span multiple
124- * lines, but only one statement is started on the lin .
122+ * lines, but only one statement is started on the line) .
125123 */
126124 private Optional <Statement > getSingleStatement (final CompilationUnit cu , final Integer line ) {
127- List <Statement > statements =
128- cu .findAll (Statement .class ).stream ()
129- .filter (s -> s .getRange ().isPresent ())
130- .filter (s -> s .getRange ().get ().begin .line == line )
131- .toList ();
132- if (statements .size () == 1 ) {
133- return Optional .of (statements .get (0 ));
134- }
135- return Optional .empty ();
125+ return cu .findAll (Statement .class ).stream ()
126+ .filter (s -> s .getRange ().isPresent ())
127+ .filter (s -> s .getRange ().get ().begin .line == line )
128+ .findFirst ();
136129 }
137130
138131 /** The results of the sensitivity analysis and, optionally, the fix to apply. */
@@ -180,26 +173,28 @@ public String newStatement() {
180173 }
181174 }
182175
183- private static String numberedContextWithExtraLines (
184- final List <String > lines ,
185- final int line ,
186- final int extraLinesBefore ,
187- final int extraLinesAfter ) {
188- StringBuilder sb = new StringBuilder ();
189- int startLine = Math .max (0 , line - extraLinesBefore );
190- int endLine = Math .min (lines .size (), line + extraLinesAfter );
191- for (int i = startLine ; i < endLine ; i ++) {
192- sb .append (i + 1 );
193- sb .append (": " );
194- sb .append (lines .get (i ));
195- sb .append ("\n " );
176+ private static String numberedContextWithExtraLines (final Path path , final int line )
177+ throws IOException {
178+ int startLine = Math .max (0 , line - CONTEXT );
179+ try (final Stream <String > lines = Files .lines (path )) {
180+ final AtomicInteger counter = new AtomicInteger (startLine );
181+ return lines
182+ .skip (startLine )
183+ .limit (1L + CONTEXT )
184+ .map (s -> counter .incrementAndGet () + ": " + s )
185+ .collect (Collectors .joining ("\n " ));
196186 }
197- return sb .toString ();
198187 }
199188
200189 @ Override
201190 public boolean shouldRun () {
202191 List <Run > runs = sarif .rawDocument ().getRuns ();
203192 return runs != null && !runs .isEmpty () && !runs .get (0 ).getResults ().isEmpty ();
204193 }
194+
195+ /**
196+ * Number of lines of leading and trailing context surrounding each Semgrep finding to include in
197+ * the code snippet sent to OpenAI.
198+ */
199+ private static final int CONTEXT = 10 ;
205200}
0 commit comments