Skip to content

Commit 13e8e88

Browse files
committed
Refactor code and add tests #1763
Signed-off-by: Chin Yeung Li <[email protected]>
1 parent 114eb75 commit 13e8e88

File tree

3 files changed

+215
-11
lines changed

3 files changed

+215
-11
lines changed

scanpipe/pipelines/scan_maven_package.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323
import json
2424

2525
from scanpipe.pipelines.scan_single_package import ScanSinglePackage
26-
from scanpipe.pipes.resolve import download_and_scan_pom_file
26+
from scanpipe.pipes.resolve import download_pom_files
2727
from scanpipe.pipes.resolve import get_pom_url_list
28+
from scanpipe.pipes.resolve import scan_pom_files
2829

2930

3031
class ScanMavenPackage(ScanSinglePackage):
@@ -51,7 +52,7 @@ def steps(cls):
5152
)
5253

5354
def fetch_and_scan_remote_pom(self):
54-
"""Fetch the pom.xml file from from maven.org if not present in codebase."""
55+
"""Fetch the .pom file from from maven.org if not present in codebase."""
5556
with open(self.scan_output_location) as file:
5657
data = json.load(file)
5758
# Return and do nothing if data has pom.xml
@@ -61,9 +62,8 @@ def fetch_and_scan_remote_pom(self):
6162
packages = data.get("packages", [])
6263

6364
pom_url_list = get_pom_url_list(self.project.input_sources[0], packages)
64-
scanned_pom_packages, scanned_dependencies = download_and_scan_pom_file(
65-
pom_url_list
66-
)
65+
pom_file_list = download_pom_files(pom_url_list)
66+
scanned_pom_packages, scanned_dependencies = scan_pom_files(pom_file_list)
6767

6868
updated_pacakges = packages + scanned_pom_packages
6969
# Replace/Update the package and dependencies section

scanpipe/pipes/resolve.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -586,10 +586,13 @@ def get_pom_url_list(input_source, packages):
586586
)
587587
pom_url_list.append(pom_url)
588588
else:
589+
from urllib.parse import urlparse
590+
589591
# Check what's the input source
590592
input_source_url = input_source.get("download_url", "")
591593

592-
if input_source_url and "maven.org/" in input_source_url:
594+
parsed_url = urlparse(input_source_url)
595+
if input_source_url and parsed_url.netloc.endswith("maven.org"):
593596
base_url = input_source_url.rsplit("/", 1)[0]
594597
pom_url = (
595598
base_url + "/" + "-".join(base_url.rstrip("/").split("/")[-2:]) + ".pom"
@@ -672,17 +675,32 @@ def is_maven_pom_url(url):
672675
return False
673676

674677

675-
def download_and_scan_pom_file(pom_url_list):
678+
def download_pom_files(pom_url_list):
679+
"""Fetch the pom file from the input pom_url_list"""
680+
pom_file_list = []
681+
for pom_url in pom_url_list:
682+
pom_file_dict = {}
683+
downloaded_pom = fetch.fetch_http(pom_url)
684+
print("download_pom.path", str(downloaded_pom.path))
685+
pom_file_dict["pom_file_path"] = str(downloaded_pom.path)
686+
pom_file_dict["output_path"] = str(downloaded_pom.path) + "-output.json"
687+
pom_file_dict["pom_url"] = pom_url
688+
pom_file_list.append(pom_file_dict)
689+
return pom_file_list
690+
691+
692+
def scan_pom_files(pom_file_list):
676693
"""Fetch and scan the pom file from the input pom_url_list"""
677694
scanned_pom_packages = []
678695
scanned_pom_deps = []
679-
for pom_url in pom_url_list:
680-
downloaded_pom = fetch.fetch_http(pom_url)
681-
scanned_pom_output_path = str(downloaded_pom.path) + "-output.json"
696+
for pom_file_dict in pom_file_list:
697+
pom_file_path = pom_file_dict.get("pom_file_path", "")
698+
scanned_pom_output_path = pom_file_dict.get("output_path", "")
699+
pom_url = pom_file_dict.get("pom_url", "")
682700

683701
# Run a package scan on the fetched pom.xml
684702
_scanning_errors = scancode.run_scan(
685-
location=str(downloaded_pom.path),
703+
location=pom_file_path,
686704
output_file=scanned_pom_output_path,
687705
run_scan_args={
688706
"package": True,

scanpipe/tests/pipes/test_resolve.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,3 +400,189 @@ def test_scanpipe_resolve_parse_maven_filename(self):
400400
self.assertEqual(result3_version, expected3_version)
401401
self.assertEqual(result4_name, expected2_name)
402402
self.assertEqual(result4_version, expected2_version)
403+
404+
@mock.patch("requests.get")
405+
def test_scanpipe_resolve_is_maven_pom_url_valid(self, mock_get):
406+
mock_response = mock.Mock()
407+
mock_response.status_code = 200
408+
mock_response.headers = {"content-type": "application/xml"}
409+
mock_response.text = '<?xml version="1.0"?><project></project>'
410+
mock_get.return_value = mock_response
411+
412+
result = resolve.is_maven_pom_url(
413+
"https://repo1.maven.org/maven2/example/example.pom"
414+
)
415+
self.assertTrue(result)
416+
417+
@mock.patch("requests.get")
418+
def test_scanpipe_resolve_is_maven_pom_url_404(self, mock_get):
419+
mock_response = mock.Mock()
420+
mock_response.status_code = 404
421+
mock_get.return_value = mock_response
422+
423+
result = resolve.is_maven_pom_url(
424+
"https://repo.maven.apache.org/maven2/example/404.pom"
425+
)
426+
self.assertFalse(result)
427+
428+
@mock.patch("requests.get")
429+
def test_scanpipe_resolve_is_maven_pom_url_error(self, mock_get):
430+
mock_response = mock.Mock()
431+
mock_response.status_code = 200
432+
mock_response.headers = {"content-type": "text/html"}
433+
mock_response.text = "<html>Error page</html>"
434+
mock_get.return_value = mock_response
435+
436+
result = resolve.is_maven_pom_url(
437+
"https://repo.maven.apache.org/maven2/example/error.pom"
438+
)
439+
self.assertFalse(result)
440+
441+
@mock.patch("scanpipe.pipes.resolve.fetch.fetch_http")
442+
def test_scanpipe_resolve_download_pom_files(self, mock_fetch_http):
443+
mock_response = mock.Mock()
444+
mock_response.path = "/safe/example1.pom"
445+
mock_fetch_http.return_value = mock_response
446+
447+
pom_urls = ["https://repo1.maven.org/maven2/example/example1.pom"]
448+
449+
expected = [
450+
{
451+
"pom_file_path": "/safe/example1.pom",
452+
"output_path": "/safe/example1.pom-output.json",
453+
"pom_url": "https://repo1.maven.org/maven2/example/example1.pom",
454+
}
455+
]
456+
457+
result = resolve.download_pom_files(pom_urls)
458+
self.assertEqual(result, expected)
459+
460+
@mock.patch("scanpipe.pipes.resolve.scancode.run_scan")
461+
@mock.patch("builtins.open", new_callable=mock.mock_open)
462+
@mock.patch("json.load")
463+
def test_scanpipe_resolve_scan_pom_files(
464+
self, mock_json_load, mock_open, mock_run_scan
465+
):
466+
mock_json_load.return_value = {
467+
"packages": [
468+
{
469+
"name": "example-package",
470+
"version": "1.0.0",
471+
"datafile_paths": ["/safe/mock_pom.xml"],
472+
}
473+
],
474+
"dependencies": [
475+
{
476+
"name": "example-dep",
477+
"version": "2.0.0",
478+
"datafile_path": "/safe/mock_pom.xml",
479+
}
480+
],
481+
}
482+
483+
pom_file_list = [
484+
{
485+
"pom_file_path": "/safe/mock.pom",
486+
"output_path": "/safe/mock.pom-output.json",
487+
"pom_url": "https://repo1.maven.org/maven2/example/example.pom",
488+
}
489+
]
490+
491+
expected_packages = [
492+
{
493+
"name": "example-package",
494+
"version": "1.0.0",
495+
"datafile_paths": [
496+
"https://repo1.maven.org/maven2/example/example.pom"
497+
],
498+
}
499+
]
500+
expected_deps = [
501+
{"name": "example-dep", "version": "2.0.0", "datafile_path": ""}
502+
]
503+
504+
packages, deps = resolve.scan_pom_files(pom_file_list)
505+
506+
self.assertEqual(packages, expected_packages)
507+
self.assertEqual(deps, expected_deps)
508+
509+
mock_run_scan.assert_called_once_with(
510+
location="/safe/mock.pom",
511+
output_file="/safe/mock.pom-output.json",
512+
run_scan_args={"package": True},
513+
)
514+
mock_open.assert_called_once_with("/safe/mock.pom-output.json")
515+
mock_json_load.assert_called_once()
516+
517+
@mock.patch("scanpipe.pipes.resolve.is_maven_pom_url")
518+
@mock.patch("scanpipe.pipes.resolve.requests.get")
519+
def test_scanpipe_resolve_construct_pom_url_from_filename(
520+
self, mock_get, mock_is_maven_pom_url
521+
):
522+
# Setup mock response from Maven Central
523+
mock_response = mock.Mock()
524+
mock_response.raise_for_status.return_value = None
525+
mock_response.json.return_value = {
526+
"response": {"docs": [{"g": "org.apache.commons"}]}
527+
}
528+
mock_get.return_value = mock_response
529+
mock_is_maven_pom_url.return_value = True
530+
531+
# Inputs
532+
artifact_id = "commons-lang3"
533+
version = "3.12.0"
534+
535+
expected_url = [
536+
"https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.pom"
537+
]
538+
539+
result = resolve.construct_pom_url_from_filename(artifact_id, version)
540+
541+
self.assertEqual(result, expected_url)
542+
mock_get.assert_called_once_with(
543+
"https://search.maven.org/solrsearch/select?q=a:commons-lang3&wt=json",
544+
timeout=5,
545+
)
546+
mock_is_maven_pom_url.assert_called_once_with(expected_url[0])
547+
548+
def test_scanpipe_resolve_get_pom_url_list_with_packages(self):
549+
packages = [
550+
{
551+
"namespace": "org.apache.commons",
552+
"name": "commons-lang3",
553+
"version": "3.12.0",
554+
}
555+
]
556+
result = resolve.get_pom_url_list({}, packages)
557+
expected = [
558+
"https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.pom"
559+
]
560+
self.assertEqual(result, expected)
561+
562+
def test_scanpipe_resolve_get_pom_url_list_with_maven_download_url(self):
563+
input_source = {
564+
"download_url": "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar"
565+
}
566+
result = resolve.get_pom_url_list(input_source, [])
567+
expected = [
568+
"https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.pom"
569+
]
570+
self.assertEqual(result, expected)
571+
572+
@mock.patch("scanpipe.pipes.resolve.construct_pom_url_from_filename")
573+
@mock.patch("scanpipe.pipes.resolve.parse_maven_filename")
574+
def test_scanpipe_resolve_get_pom_url_list_with_jar_filename(
575+
self, mock_parse, mock_construct
576+
):
577+
input_source = {"filename": "commons-lang3-3.12.0.jar"}
578+
mock_parse.return_value = ("commons-lang3", "3.12.0")
579+
mock_construct.return_value = [
580+
"https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.pom"
581+
]
582+
result = resolve.get_pom_url_list(input_source, [])
583+
self.assertEqual(result, mock_construct.return_value)
584+
585+
def test_scanpipe_resolve_get_pom_url_list_with_invalid_filename(self):
586+
input_source = {"filename": "not-a-jar.txt"}
587+
result = resolve.get_pom_url_list(input_source, [])
588+
self.assertEqual(result, [])

0 commit comments

Comments
 (0)