Skip to content

Commit fe02bc7

Browse files
authored
Merge pull request #99 from blackducksoftware/gsnyder/search-compoonents
extending behavior of search_components and adding some examples of u…
2 parents 261bf57 + 8c6e5e0 commit fe02bc7

File tree

4 files changed

+169
-8
lines changed

4 files changed

+169
-8
lines changed

blackduck/HubRestApi.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,10 +1391,17 @@ def get_components(self, limit=100, parameters={}):
13911391
response = self.execute_get(url, custom_headers=custom_headers)
13921392
return response.json()
13931393

1394-
def search_components(self, search_str, limit=100, parameters={}):
1394+
def search_components(self, search_str_or_query, limit=100, parameters={}):
13951395
if limit:
13961396
parameters.update({'limit':limit})
1397-
url = self.get_apibase() + "/search/components?q=name:{}".format(urllib.parse.quote(search_str))
1397+
if search_str_or_query.startswith("q="):
1398+
# allow caller to override original behavior with their own query
1399+
query = search_str_or_query
1400+
else:
1401+
# maintain original, somewhat flawed behavior
1402+
query = "q=name:{}".format(search_str_or_query)
1403+
parm_str = self._get_parameter_string(parameters)
1404+
url = self.get_apibase() + "/search/components{}&{}".format(parm_str, query)
13981405
response = self.execute_get(url)
13991406
return response.json()
14001407

blackduck/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
VERSION = (0, 0, 48)
1+
VERSION = (0, 0, 49)
22

33
__version__ = '.'.join(map(str, VERSION))
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import json
5+
import logging
6+
import math
7+
import sys
8+
from pathlib import Path
9+
from urllib.parse import urlparse
10+
11+
from blackduck.HubRestApi import HubInstance
12+
13+
14+
parser = argparse.ArgumentParser("Given the URL to a Github repository, find it in the Black Duck KB and return any info available")
15+
parser.add_argument("github_url")
16+
parser.add_argument("-l", "--limit", type=int, default=100, help="The number of components to return with each call to the REST API (default: 100)")
17+
args = parser.parse_args()
18+
19+
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', stream=sys.stderr, level=logging.DEBUG)
20+
logging.getLogger("requests").setLevel(logging.WARNING)
21+
logging.getLogger("urllib3").setLevel(logging.WARNING)
22+
23+
def match_urls(input_url, url_from_kb):
24+
'''Try matching variations of the input URL against the url from the Black Duck KB,
25+
e.g. add a '/' or add '.git' or check http versus https
26+
'''
27+
if input_url.startswith("https:"):
28+
check_urls = [input_url, input_url.replace("https:", "http:")]
29+
elif input_url.startswith("http:"):
30+
check_urls = [input_url, input_url.replace("http:", "https:")]
31+
else:
32+
raise Exception("Unsupported scheme {}".format(urlparse(input_url).scheme))
33+
34+
all_checks = []
35+
for check_url in check_urls:
36+
with_slash_added = check_url + "/" == url_from_kb
37+
with_dot_git_added = check_url + ".git" == url_from_kb
38+
exact = check_url == url_from_kb
39+
all_checks.extend([with_slash_added, with_dot_git_added, exact])
40+
41+
return any(all_checks)
42+
43+
def has_github_link(links):
44+
http = any(["http://github.com" in l for l in links])
45+
https = any(["https://github.com" in l for l in links])
46+
return (http or https)
47+
48+
hub = HubInstance()
49+
50+
#
51+
# Get the repo name which, for now, is defined as the last "part" in the pathname to the repo
52+
# The repo name will be used to search the Black Duck KB, e.g.
53+
# https://github.com/django/django --> django becomes the search keyword
54+
#
55+
repo_name = Path(urlparse(args.github_url).path).parts[-1]
56+
57+
possible_matches = []
58+
exact_match = None
59+
60+
first = True
61+
num_results_found = math.inf
62+
offset = 0
63+
total_hits = 0
64+
65+
#
66+
# Loop on the search results (aka hits) accumulating any possible matches
67+
# and stop if/when you find an exact match.
68+
#
69+
# An exact match is a component from the initial search that has the exact URL provided
70+
#
71+
while offset < num_results_found:
72+
logging.debug(f"Searching for {repo_name}, offset {offset}, limit {args.limit}")
73+
parameters = {'limit': args.limit, 'offset': offset}
74+
75+
search_results = hub.search_components(
76+
search_str_or_query=f"q={repo_name}",
77+
parameters=parameters)
78+
79+
if first:
80+
first = False
81+
num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
82+
logging.debug(f"numResultsFound: {num_results_found}")
83+
84+
for hit in search_results['items'][0].get('hits', []):
85+
number_versions = int(hit['fields']['release_count'][0])
86+
component_name = hit['fields']['name'][0]
87+
component_url = hit['component']
88+
component_description = hit['fields']['description'][0]
89+
links = hit['fields'].get('links', [])
90+
91+
total_hits += 1
92+
93+
if component_name.lower() == repo_name.lower() and has_github_link(links):
94+
component_info = {
95+
'component_name': component_name,
96+
'component_url': component_url,
97+
'component_description': component_description,
98+
'links': links,
99+
'number_versions': number_versions
100+
}
101+
logging.info(f"Found one possible match in {component_name}")
102+
possible_matches.append(component_info)
103+
matched_urls = [l for l in links if match_urls(args.github_url, l)]
104+
105+
if matched_urls:
106+
logging.debug(f"Found the following matched URLS: {matched_urls}")
107+
exact_match = component_info
108+
logging.debug("Found an exact match, breaking loop")
109+
break # breaks from for-loop
110+
111+
if exact_match:
112+
break # breaks from while-loop
113+
114+
offset += args.limit
115+
116+
logging.debug(f"Found {len(possible_matches)} components that could be matches after looking at {total_hits} components found in the search results")
117+
118+
if exact_match:
119+
logging.info("Found an exact match")
120+
else:
121+
logging.warning("Did not find any exact match")
122+
123+
summary = {
124+
'possible_matches': possible_matches,
125+
'exact_match': exact_match
126+
}
127+
print(json.dumps(summary))
128+

examples/search_components.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
import argparse
44
import json
55
import logging
6+
import math
67
import sys
78

89
from blackduck.HubRestApi import HubInstance
910

1011
parser = argparse.ArgumentParser("A program that uses the Black Duck search API to find components...and other things")
1112
parser.add_argument("search_str")
12-
13-
parser.add_argument("-l", "--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
13+
parser.add_argument("-l", "--limit", type=int, default=100, help="Set the per call limit on number of results")
14+
parser.add_argument("--loglevel", choices=["CRITICAL", "DEBUG", "ERROR", "INFO", "WARNING"], default="DEBUG", help="Choose the desired logging level - CRITICAL, DEBUG, ERROR, INFO, or WARNING. (default: DEBUG)")
1415

1516
args = parser.parse_args()
1617

@@ -21,12 +22,37 @@
2122
'INFO': logging.INFO,
2223
'WARNING': logging.WARNING,
2324
}
24-
logging.basicConfig(stream=sys.stdout, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
25+
logging.basicConfig(stream=sys.stderr, format='%(threadName)s: %(asctime)s: %(levelname)s: %(message)s', level=logging_levels[args.loglevel])
2526
logging.getLogger("requests").setLevel(logging.WARNING)
2627
logging.getLogger("urllib3").setLevel(logging.WARNING)
2728

2829
hub = HubInstance()
2930

31+
offset=0
32+
33+
hits = []
34+
num_results_found = math.inf
35+
first = True
36+
37+
while offset < num_results_found:
38+
parameters = {
39+
'limit': args.limit,
40+
'offset': offset,
41+
}
42+
search_results = hub.search_components(args.search_str, parameters=parameters)
43+
if first:
44+
first = False
45+
num_results_found = search_results['items'][0]['searchResultStatistics']['numResultsFound']
46+
logging.debug(f"Search ({args.search_str}) numResultsFound: {num_results_found}")
47+
48+
hits.extend(search_results['items'][0]['hits'])
49+
num_results_this_page = search_results['items'][0]['searchResultStatistics']['numResultsInThisPage']
50+
51+
offset += args.limit
52+
logging.debug(f"Retrieved {num_results_this_page} out of {num_results_found}, setting offset to {offset}")
53+
54+
logging.debug(f"Retreived a total of {len(hits)} hits")
55+
print(json.dumps(hits))
56+
57+
3058

31-
search_results = hub.search_components(args.search_str)
32-
print(json.dumps(search_results))

0 commit comments

Comments
 (0)