Merging latest master in

snps-kumykov · snps-kumykov · commit ea13990b21c0 · 2024-07-22T08:39:43.000-04:00
Merge branch 'master' into mkumykov-multi-image
diff --git a/examples/client/file_hierarchy_report.py b/examples/client/file_hierarchy_report.py
@@ -157,15 +157,78 @@ def trim_version_report(version_report, reduced_path_set):
     reduced_aggregate_bom_view_entries = [e for e in aggregate_bom_view_entries if f"{e['producerProject']['id']}:{e['producerReleases'][0]['id']}" in deduplicated]
     version_report['aggregateBomViewEntries'] = reduced_aggregate_bom_view_entries
 
-def write_output_file(version_report, output_file):
+'''
+
+CSV output details
+
+component name  = aggregateBomViewEntries[].producerProject.name
+version name  = aggregateBomViewEntries[].producerReleases[0].version
+license  = licenses[].licenseDisplay
+file path = extract from detailedFileBomViewEntries
+match type = aggregateBomViewEntries[].matchTypes
+review status = aggregateBomViewEntries[].reviewSummary.reviewStatus
+
+'''
+def get_csv_fieldnames():
+    return ['component name', 'version name', 'license', 'match type', 'review status']
+
+def get_csv_data(version_report, keep_dupes):
+    csv_data = list()
+    components = list()
+    for bom_view_entry in version_report['aggregateBomViewEntries']:
+        entry = dict()   
+        entry['component name'] = bom_view_entry['producerProject']['name']
+        entry['version name'] = bom_view_entry['producerReleases'][0]['version']
+        entry['license'] = bom_view_entry['licenses'][0]['licenseDisplay'].replace(' AND ',';').replace('(','').replace(')','')
+        pid = bom_view_entry['producerProject']['id']
+        vid = bom_view_entry['producerReleases'][0]['id']
+        #path_list = [p['path'] for p in version_report['detailedFileBomViewEntries'] if p['projectId'] == pid and p['versionId'] == vid]
+        #entry['file path'] = ';'.join(path_list)
+        entry['match type'] = ';'.join(bom_view_entry['matchTypes'])
+        entry['review status'] = bom_view_entry['reviewSummary']['reviewStatus']
+        
+        # Only add if this component was not previously added.
+        composite_key = pid + vid
+        if composite_key not in components:
+            csv_data.append(entry)
+            components.append(composite_key)
+    if keep_dupes:
+        return csv_data
+    else:
+        return remove_duplicates(csv_data)
+    
+def remove_duplicates(data):
+    # Put data into buckets by version
+    buckets = dict()
+    for row in data:
+        name = row['component name'].lower()
+        version = row['version name']
+        if not version in buckets:
+            buckets[version] = [row]
+        else:
+            buckets[version].append(row)
+    # Run reduction process for component names that start with existing component name
+    # This process will ignore case in component names
+    for set in buckets.values():
+        set.sort(key = lambda d: d['component name'].lower())
+        for row in set:
+            index = set.index(row)
+            name  = row['component name'].lower()
+            while index + 1 < len(set) and set[index+1]['component name'].lower().startswith(name):
+                set.pop(index+1)
+    reduced_data = list()
+    for b in buckets.values():
+        reduced_data.extend(b)
+    return reduced_data
+
+def write_output_file(version_report, output_file, keep_dupes):
     if output_file.lower().endswith(".csv"):
         logging.info(f"Writing CSV output into {output_file}")
-        field_names = list(version_report['aggregateBomViewEntries'][0].keys())
+        field_names = get_csv_fieldnames()
         with open(output_file, "w") as f:
-            writer = csv.DictWriter(f, fieldnames = field_names, extrasaction = 'ignore') # TODO
+            writer = csv.DictWriter(f, fieldnames = field_names, extrasaction = 'ignore',quoting=csv.QUOTE_ALL) # TODO
             writer.writeheader()
-            writer.writerows(version_report['aggregateBomViewEntries'])
-
+            writer.writerows(get_csv_data(version_report, keep_dupes))
         return
     # If it's neither, then .json
     if not output_file.lower().endswith(".json"):
@@ -183,6 +246,7 @@ def parse_command_args():
     parser.add_argument("-pn", "--project-name", required=True, help="Project Name")
     parser.add_argument("-pv", "--project-version-name", required=True, help="Project Version Name")
     parser.add_argument("-o", "--output-file", required=False, help="File name to write output. File extension determines format .json and .csv, json is the default.")
+    parser.add_argument("-kd", "--keep-dupes", action='store_true', help="Do not reduce CVS data by fuzzy matching component names")
     parser.add_argument("-kh", "--keep_hierarchy", action='store_true', help="Set to keep all entries in the sources report. Will not remove components found under others.")
     parser.add_argument("--report-retries", metavar="", type=int, default=RETRY_LIMIT, help="Retries for receiving the generated BlackDuck report. Generating copyright report tends to take longer minutes.")
     parser.add_argument("--report-timeout", metavar="", type=int, default=RETRY_TIMER, help="Wait time between subsequent download attempts.")
@@ -230,7 +294,7 @@ def main():
             trim_version_report(version_report, reduced_path_set)
             logging.info(f"Truncated dataset contains {len(version_report['aggregateBomViewEntries'])} bom entries and {len(version_report['detailedFileBomViewEntries'])} file view entries")
 
-        write_output_file(version_report, output_file)
+        write_output_file(version_report, output_file, args.keep_dupes)
 
         # Combine component data with selected file data
         # Output result with CSV anf JSON as options.