@@ -93,6 +93,76 @@ def resolve_artifact_location(
9393 return None
9494
9595
96+ def transform_sarif_results (
97+ sarif_data : dict , base_path : Path , context_length : int , vulnerability_limit : int
98+ ) -> dict [tuple [str , int , int , int ], list [str ]]:
99+ # Process each result in SARIF data
100+ grouped_messages = defaultdict (list )
101+ vulnerability_count = 0
102+ for run_idx , run in enumerate (sarif_data .get ("runs" , [])):
103+ artifact_locations = [
104+ parse_sarif_location (base_path , artifact ["location" ]["uri" ]) for artifact in run .get ("artifacts" , [])
105+ ]
106+
107+ for result_idx , result in enumerate (run .get ("results" , [])):
108+ for location_idx , location in enumerate (result .get ("locations" , [])):
109+ physical_location = location .get ("physicalLocation" , {})
110+
111+ artifact_location = physical_location .get ("artifactLocation" , {})
112+ uri = resolve_artifact_location (base_path , artifact_location , artifact_locations )
113+ if uri is None :
114+ logger .warn (
115+ f'Unable to find file for ".runs[{ run_idx } ].results[{ result_idx } ].locations[{ location_idx } ]"'
116+ )
117+ continue
118+
119+ region = physical_location .get ("region" , {})
120+ start_line = region .get ("startLine" , 1 )
121+ end_line = region .get ("endLine" , start_line )
122+ start_line = start_line - 1
123+
124+ # Generate file path assuming code is in the current working directory
125+ file_path = str (uri .relative_to (base_path ))
126+
127+ # Extract lines from the code file
128+ logger .info (f"Extracting context for { file_path } at { start_line } :{ end_line } " )
129+ try :
130+ with open_with_chardet (file_path , "r" ) as file :
131+ src = file .read ()
132+
133+ source_lines = src .splitlines (keepends = True )
134+ context_start , context_end = get_source_code_context (
135+ file_path , source_lines , start_line , end_line , context_length
136+ )
137+
138+ source_code_context = None
139+ if context_start is not None and context_end is not None :
140+ source_code_context = "" .join (source_lines [context_start :context_end ])
141+
142+ except FileNotFoundError :
143+ context_start = None
144+ context_end = None
145+ source_code_context = None
146+ logger .info (f"File not found in the current working directory: { file_path } " )
147+
148+ if source_code_context is None :
149+ logger .info (f"No context found for { file_path } at { start_line } :{ end_line } " )
150+ continue
151+
152+ start = context_start if context_start is not None else start_line
153+ end = context_end if context_end is not None else end_line
154+
155+ grouped_messages [(uri , start , end , source_code_context )].append (
156+ result .get ("message" , {}).get ("text" , "" )
157+ )
158+
159+ vulnerability_count = vulnerability_count + 1
160+ if 0 < vulnerability_limit <= vulnerability_count :
161+ return grouped_messages
162+
163+ return grouped_messages
164+
165+
96166class ExtractCode (Step ):
97167 required_keys = {"sarif_file_path" }
98168
@@ -112,7 +182,6 @@ def __init__(self, inputs: dict):
112182 self .vulnerability_limit = inputs .get ("vulnerability_limit" , 10 )
113183
114184 # Prepare for data extraction
115- self .extracted_data = []
116185 self .extracted_code_contexts = []
117186
118187 def run (self ) -> dict :
@@ -122,77 +191,8 @@ def run(self) -> dict:
122191
123192 vulnerability_count = 0
124193 base_path = Path .cwd ()
125- # Process each result in SARIF data
126- grouped_messages = defaultdict (list )
127- for run_idx , run in enumerate (sarif_data .get ("runs" , [])):
128- artifact_locations = [
129- parse_sarif_location (base_path , artifact ["location" ]["uri" ]) for artifact in run .get ("artifacts" , [])
130- ]
131-
132- for result_idx , result in enumerate (run .get ("results" , [])):
133- for location_idx , location in enumerate (result .get ("locations" , [])):
134- physical_location = location .get ("physicalLocation" , {})
135-
136- artifact_location = physical_location .get ("artifactLocation" , {})
137- uri = resolve_artifact_location (base_path , artifact_location , artifact_locations )
138- if uri is None :
139- logger .warn (
140- f'Unable to find file for ".runs[{ run_idx } ].results[{ result_idx } ].locations[{ location_idx } ]"'
141- )
142- continue
143-
144- region = physical_location .get ("region" , {})
145- start_line = region .get ("startLine" , 1 )
146- end_line = region .get ("endLine" , start_line )
147- start_line = start_line - 1
148-
149- # Generate file path assuming code is in the current working directory
150- file_path = str (uri .relative_to (base_path ))
151-
152- # Extract lines from the code file
153- logger .info (f"Extracting context for { file_path } at { start_line } :{ end_line } " )
154- try :
155- with open_with_chardet (file_path , "r" ) as file :
156- src = file .read ()
157-
158- source_lines = src .splitlines (keepends = True )
159- context_start , context_end = get_source_code_context (
160- file_path , source_lines , start_line , end_line , self .context_length
161- )
162-
163- source_code_context = None
164- if context_start is not None and context_end is not None :
165- source_code_context = "" .join (source_lines [context_start :context_end ])
166-
167- except FileNotFoundError :
168- context_start = None
169- context_end = None
170- source_code_context = None
171- logger .info (f"File not found in the current working directory: { file_path } " )
172-
173- if source_code_context is None :
174- logger .info (f"No context found for { file_path } at { start_line } :{ end_line } " )
175- continue
176-
177- start = context_start if context_start is not None else start_line
178- end = context_end if context_end is not None else end_line
179- self .extracted_data .append (
180- {
181- "affectedCode" : source_code_context ,
182- "startLine" : start ,
183- "endLine" : end ,
184- "uri" : file_path ,
185- "messageText" : result .get ("message" , {}).get ("text" , "" ),
186- }
187- )
188-
189- grouped_messages [(uri , start , end , source_code_context )].append (
190- result .get ("message" , {}).get ("text" , "" )
191- )
192194
193- vulnerability_count = vulnerability_count + 1
194- if 0 < self .vulnerability_limit <= vulnerability_count :
195- break
195+ grouped_messages = transform_sarif_results (sarif_data , base_path , self .context_length , self .vulnerability_limit )
196196
197197 self .extracted_code_contexts = [
198198 {
0 commit comments