Skip to content

Commit 1282ab5

Browse files
author
Glenn Snyder
committed
extending behavior of search_components and adding some examples of using it to find components
1 parent 261bf57 commit 1282ab5

File tree

4 files changed

+172
-8
lines changed

4 files changed

+172
-8
lines changed

blackduck/HubRestApi.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,10 +1391,17 @@ def get_components(self, limit=100, parameters={}):
13911391
response = self.execute_get(url, custom_headers=custom_headers)
13921392
return response.json()
13931393

1394-
def search_components(self, search_str, limit=100, parameters={}):
1394+
def search_components(self, search_str_or_query, limit=100, parameters={}):
13951395
if limit:
13961396
parameters.update({'limit':limit})
1397-
url = self.get_apibase() + "/search/components?q=name:{}".format(urllib.parse.quote(search_str))
1397+
if search_str_or_query.startswith("q="):
1398+
# allow caller to override original behavior with their own query
1399+
query = search_str_or_query
1400+
else:
1401+
# maintain original, somewhat flawed behavior
1402+
query = "q=name:{}".format(search_str_or_query)
1403+
parm_str = self._get_parameter_string(parameters)
1404+
url = self.get_apibase() + "/search/components{}&{}".format(parm_str, query)
13981405
response = self.execute_get(url)
13991406
return response.json()
14001407

blackduck/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
VERSION = (0, 0, 48)
1+
VERSION = (0, 0, 49)
22

33
__version__ = '.'.join(map(str, VERSION))
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import json
5+
import logging
6+
import math
7+
import sys
8+
from pathlib import Path
9+
from urllib.parse import urlparse
10+
11+
from blackduck.HubRestApi import HubInstance
12+
13+
14+
parser = argparse.ArgumentParser("Given the URL to a Github repository, find it in the Black Duck KB and return any info available")
15+
parser.add_argument("github_url")
16+
parser.add_argument("-l", "--limit", type=int, default=100, help="The number of components to return with each call to the REST API (default: 100)")
17+
# parser.add_argument("-t", "--total", type=int, default=99999, help="The total number of components to retrieve")
18+
args = parser.parse_args()
19+
20+
logging.basicConfig(format='%(asctime)s%(levelname)s:%(message)s', stream=sys.stderr, level=logging.DEBUG)
21+
logging.getLogger("requests").setLevel(logging.WARNING)
22+
logging.getLogger("urllib3").setLevel(logging.WARNING)
23+
24+
def match_urls(input_url, url_from_kb):
25+
'''Try matching variations of the input URL against the url from the Black Duck KB,
26+
e.g. add a '/' or add '.git' or check http versus https
27+
'''
28+
if input_url.startswith("https:"):
29+
check_urls = [input_url, input_url.replace("https:", "http:")]
30+
elif input_url.startswith("http:"):
31+
check_urls = [input_url, input_url.replace("http:", "https:")]
32+
else:
33+
raise Exception("Unsupported scheme {}".format(urlparse(input_url).scheme))
34+
35+
all_checks = []
36+
for check_url in check_urls:
37+
with_slash_added = check_url + "/" == url_from_kb
38+
with_dot_git_added = check_url + ".git" == url_from_kb
39+
exact = check_url == url_from_kb
40+
all_checks.extend([with_slash_added, with_dot_git_added, exact])
41+
42+
return any(all_checks)
43+
44+
def has_github_link(links):
45+
http = any(["http://github.com" in l for l in links])
46+
https = any(["https://github.com" in l for l in links])
47+
return (http or https)
48+
49+
hub = HubInstance()
50+
51+
# search_url = hub.get_apibase() + "/v1/search/PROJECT?ownership=1&ownership=3"
52+
53+
#
54+
# Get the repo name which, for now, is defined as the last "part" in the pathname to the repo
55+
# The repo name will be used to search the Black Duck KB, e.g.
56+
# https://github.com/django/django --> django becomes the search keyword
57+
#
58+
repo_name = Path(urlparse(args.github_url).path).parts[-1]
59+
60+
possible_matches = []
61+
exact_match = None
62+
63+
first = True
64+
num_results_found = math.inf
65+
offset = 0
66+
total_hits = 0
67+
68+
#
69+
# Loop on the search results (aka hits) accumulating any possible matches
70+
# and stop if/when you find an exact match.
71+
#
72+
# An exact match is a component from the initial search that has the exact URL provided
73+
#
74+
while offset < num_results_found:
75+
logging.debug(f"Searching for {repo_name}, offset {offset}, limit {args.limit}")
76+
parameters = {'limit': args.limit, 'offset': offset}
77+
78+
search_results = hub.search_components(
79+
search_str_or_query=f"q={repo_name}",
80+
parameters=parameters)
81+
82+
if first:
83+
first = False
84+
num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
85+
logging.debug(f"numResultsFound: {num_results_found}")
86+
87+
for hit in search_results['items'][0].get('hits', []):
88+
number_versions = int(hit['fields']['release_count'][0])
89+
component_name = hit['fields']['name'][0]
90+
component_url = hit['component']
91+
component_description = hit['fields']['description'][0]
92+
links = hit['fields'].get('links', [])
93+
94+
total_hits += 1
95+
96+
if component_name.lower() == repo_name.lower() and has_github_link(links):
97+
component_info = {
98+
'component_name': component_name,
99+
'component_url': component_url,
100+
'component_description': component_description,
101+
'links': links,
102+
'number_versions': number_versions
103+
}
104+
logging.info(f"Found one possible match in {component_name}")
105+
possible_matches.append(component_info)
106+
matched_urls = [l for l in links if match_urls(args.github_url, l)]
107+
108+
if matched_urls:
109+
logging.debug(f"Found the following matched URLS: {matched_urls}")
110+
exact_match = component_info
111+
logging.debug("Found an exact match, breaking loop")
112+
break # breaks from for-loop
113+
114+
if exact_match:
115+
break # breaks from while-loop
116+
117+
offset += args.limit
118+
119+
logging.debug(f"Found {len(possible_matches)} components that could be matches after looking at {total_hits} components found in the search results")
120+
121+
if exact_match:
122+
logging.info("Found an exact match")
123+
else:
124+
logging.warning("Did not find any exact match")
125+
126+
summary = {
127+
'possible_matches': possible_matches,
128+
'exact_match': exact_match
129+
}
130+
print(json.dumps(summary))
131+

examples/search_components.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
import argparse
44
import json
55
import logging
6+
import math
67
import sys
78

89
from blackduck.HubRestApi import HubInstance
910

1011
parser = argparse.ArgumentParser("A program that uses the Black Duck search API to find components...and other things")
1112
parser.add_argument("search_str")
12-
13-
parser.add_argument("-l", "--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
13+
parser.add_argument("-l", "--limit", type=int, default=100, help="Set the per call limit on number of results")
14+
parser.add_argument("--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
1415

1516
args = parser.parse_args()
1617

@@ -21,12 +22,37 @@
2122
'INFO': logging.INFO,
2223
'WARNING': logging.WARNING,
2324
}
24-
logging.basicConfig(stream=sys.stdout, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
25+
logging.basicConfig(stream=sys.stderr, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
2526
logging.getLogger("requests").setLevel(logging.WARNING)
2627
logging.getLogger("urllib3").setLevel(logging.WARNING)
2728

2829
hub = HubInstance()
2930

31+
offset=0
32+
33+
hits = []
34+
num_results_found = math.inf
35+
first = True
36+
37+
while offset < num_results_found:
38+
parameters = {
39+
'limit': args.limit,
40+
'offset': offset,
41+
}
42+
search_results = hub.search_components(args.search_str, parameters=parameters)
43+
if first:
44+
first = False
45+
num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
46+
logging.debug(f"Search ({args.search_str}) numResultsFound: {num_results_found}")
47+
48+
hits.extend(search_results['items'][0]['hits'])
49+
num_results_this_page = search_results['items'][0]['searchResultStatistics']['numResultsInThisPage']
50+
51+
offset += args.limit
52+
logging.debug(f"Retrieved {num_results_this_page} out of {num_results_found}, setting offset to {offset}")
53+
54+
logging.debug(f"Retreived a total of {len(hits)} hits")
55+
print(json.dumps(hits))
56+
57+
3058

31-
search_results = hub.search_components(args.search_str)
32-
print(json.dumps(search_results))

0 commit comments

Comments
 (0)