|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +import argparse |
| 4 | +import json |
| 5 | +import logging |
| 6 | +import math |
| 7 | +import sys |
| 8 | +from pathlib import Path |
| 9 | +from urllib.parse import urlparse |
| 10 | + |
| 11 | +from blackduck.HubRestApi import HubInstance |
| 12 | + |
| 13 | + |
| 14 | +parser = argparse.ArgumentParser("Given the URL to a Github repository, find it in the Black Duck KB and return any info available") |
| 15 | +parser.add_argument("github_url") |
| 16 | +parser.add_argument("-l", "--limit", type=int, default=100, help="The number of components to return with each call to the REST API (default: 100)") |
| 17 | +args = parser.parse_args() |
| 18 | + |
| 19 | +logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', stream=sys.stderr, level=logging.DEBUG) |
| 20 | +logging.getLogger("requests").setLevel(logging.WARNING) |
| 21 | +logging.getLogger("urllib3").setLevel(logging.WARNING) |
| 22 | + |
| 23 | +def match_urls(input_url, url_from_kb): |
| 24 | + '''Try matching variations of the input URL against the url from the Black Duck KB, |
| 25 | + e.g. add a '/' or add '.git' or check http versus https |
| 26 | + ''' |
| 27 | + if input_url.startswith("https:"): |
| 28 | + check_urls = [input_url, input_url.replace("https:", "http:")] |
| 29 | + elif input_url.startswith("http:"): |
| 30 | + check_urls = [input_url, input_url.replace("http:", "https:")] |
| 31 | + else: |
| 32 | + raise Exception("Unsupported scheme {}".format(urlparse(input_url).scheme)) |
| 33 | + |
| 34 | + all_checks = [] |
| 35 | + for check_url in check_urls: |
| 36 | + with_slash_added = check_url + "/" == url_from_kb |
| 37 | + with_dot_git_added = check_url + ".git" == url_from_kb |
| 38 | + exact = check_url == url_from_kb |
| 39 | + all_checks.extend([with_slash_added, with_dot_git_added, exact]) |
| 40 | + |
| 41 | + return any(all_checks) |
| 42 | + |
| 43 | +def has_github_link(links): |
| 44 | + http = any(["http://github.com" in l for l in links]) |
| 45 | + https = any(["https://github.com" in l for l in links]) |
| 46 | + return (http or https) |
| 47 | + |
| 48 | +hub = HubInstance() |
| 49 | + |
| 50 | +# |
| 51 | +# Get the repo name which, for now, is defined as the last "part" in the pathname to the repo |
| 52 | +# The repo name will be used to search the Black Duck KB, e.g. |
| 53 | +# https://github.com/django/django --> django becomes the search keyword |
| 54 | +# |
| 55 | +repo_name = Path(urlparse(args.github_url).path).parts[-1] |
| 56 | + |
| 57 | +possible_matches = [] |
| 58 | +exact_match = None |
| 59 | + |
| 60 | +first = True |
| 61 | +num_results_found = math.inf |
| 62 | +offset = 0 |
| 63 | +total_hits = 0 |
| 64 | + |
| 65 | +# |
| 66 | +# Loop on the search results (aka hits) accumulating any possible matches |
| 67 | +# and stop if/when you find an exact match. |
| 68 | +# |
| 69 | +# An exact match is a component from the initial search that has the exact URL provided |
| 70 | +# |
| 71 | +while offset < num_results_found: |
| 72 | + logging.debug(f"Searching for {repo_name}, offset {offset}, limit {args.limit}") |
| 73 | + parameters = {'limit': args.limit, 'offset': offset} |
| 74 | + |
| 75 | + search_results = hub.search_components( |
| 76 | + search_str_or_query=f"q={repo_name}", |
| 77 | + parameters=parameters) |
| 78 | + |
| 79 | + if first: |
| 80 | + first = False |
| 81 | + num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound'] |
| 82 | + logging.debug(f"numResultsFound: {num_results_found}") |
| 83 | + |
| 84 | + for hit in search_results['items'][0].get('hits', []): |
| 85 | + number_versions = int(hit['fields']['release_count'][0]) |
| 86 | + component_name = hit['fields']['name'][0] |
| 87 | + component_url = hit['component'] |
| 88 | + component_description = hit['fields']['description'][0] |
| 89 | + links = hit['fields'].get('links', []) |
| 90 | + |
| 91 | + total_hits += 1 |
| 92 | + |
| 93 | + if component_name.lower() == repo_name.lower() and has_github_link(links): |
| 94 | + component_info = { |
| 95 | + 'component_name': component_name, |
| 96 | + 'component_url': component_url, |
| 97 | + 'component_description': component_description, |
| 98 | + 'links': links, |
| 99 | + 'number_versions': number_versions |
| 100 | + } |
| 101 | + logging.info(f"Found one possible match in {component_name}") |
| 102 | + possible_matches.append(component_info) |
| 103 | + matched_urls = [l for l in links if match_urls(args.github_url, l)] |
| 104 | + |
| 105 | + if matched_urls: |
| 106 | + logging.debug(f"Found the following matched URLS: {matched_urls}") |
| 107 | + exact_match = component_info |
| 108 | + logging.debug("Found an exact match, breaking loop") |
| 109 | + break # breaks from for-loop |
| 110 | + |
| 111 | + if exact_match: |
| 112 | + break # breaks from while-loop |
| 113 | + |
| 114 | + offset += args.limit |
| 115 | + |
| 116 | +logging.debug(f"Found {len(possible_matches)} components that could be matches after looking at {total_hits} components found in the search results") |
| 117 | + |
| 118 | +if exact_match: |
| 119 | + logging.info("Found an exact match") |
| 120 | +else: |
| 121 | + logging.warning("Did not find any exact match") |
| 122 | + |
| 123 | +summary = { |
| 124 | + 'possible_matches': possible_matches, |
| 125 | + 'exact_match': exact_match |
| 126 | +} |
| 127 | +print(json.dumps(summary)) |
| 128 | + |
0 commit comments