Skip to content

Commit 3854a85

Browse files
feat: verify whether the reported repository can be linked back to the artifact (#873)
Signed-off-by: Mohammad Abdollahpour <[email protected]>
1 parent 9ef9d50 commit 3854a85

File tree

17 files changed

+946
-1
lines changed

17 files changed

+946
-1
lines changed

src/macaron/artifact/maven.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module declares types and utilities for Maven artifacts."""
5-
5+
import re
66
from collections.abc import Sequence
77

88
from packageurl import PackageURL
@@ -140,3 +140,21 @@ def create_maven_purl_from_artifact_filename(
140140
)
141141

142142
return None
143+
144+
145+
def is_valid_maven_group_id(group_id: str) -> bool:
146+
"""Check if the provided string is a valid maven group id.
147+
148+
Parameters
149+
----------
150+
group_id : str
151+
The group id to check.
152+
153+
Returns
154+
-------
155+
bool
156+
True if the group id is valid, False otherwise
157+
"""
158+
# Should match strings like org.example.foo, org.example-2.foo.bar_1.
159+
pattern = r"^[a-zA-Z][a-zA-Z0-9-]*\.([a-zA-Z][a-zA-Z0-9-]*\.)*[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$"
160+
return re.match(pattern, group_id) is not None

src/macaron/repo_finder/repo_finder_deps_dev.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
import json
66
import logging
77
from enum import StrEnum
8+
from typing import Any
89
from urllib.parse import quote as encode
910

1011
from packageurl import PackageURL
1112

1213
from macaron.json_tools import json_extract
1314
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
1415
from macaron.repo_finder.repo_validator import find_valid_repository_url
16+
from macaron.slsa_analyzer.git_url import clean_url
1517
from macaron.util import send_get_http_raw
1618

1719
logger: logging.Logger = logging.getLogger(__name__)
@@ -71,6 +73,41 @@ def find_repo(self, purl: PackageURL) -> str:
7173

7274
return ""
7375

76+
@staticmethod
77+
def get_project_info(project_url: str) -> dict[str, Any] | None:
78+
"""Retrieve project information from deps.dev.
79+
80+
Parameters
81+
----------
82+
project_url : str
83+
The URL of the project.
84+
85+
Returns
86+
-------
87+
dict[str, Any] | None
88+
The project information or None if the information could not be retrieved.
89+
"""
90+
clean_repo_url = clean_url(project_url)
91+
if clean_repo_url is None or clean_repo_url.hostname is None:
92+
logger.debug("Invalid project url format: %s", project_url)
93+
return None
94+
95+
project_key = clean_repo_url.hostname + clean_repo_url.path
96+
97+
request_url = f"https://api.deps.dev/v3alpha/projects/{encode(project_key, safe='')}"
98+
response = send_get_http_raw(request_url)
99+
if not (response and response.text):
100+
logger.debug("Failed to retrieve additional repo info for: %s", project_url)
101+
return None
102+
103+
try:
104+
response_json: dict = json.loads(response.text)
105+
except ValueError as error:
106+
logger.debug("Failed to parse response from deps.dev: %s", error)
107+
return None
108+
109+
return response_json
110+
74111
def _create_urls(self, purl: PackageURL) -> list[str]:
75112
"""
76113
Create the urls to search for the metadata relating to the passed artifact.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This package contains classes for repository verification."""
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module contains code to verify whether a reported repository can be linked back to the artifact."""
5+
import logging
6+
7+
from macaron.repo_verifier.repo_verifier_base import (
8+
RepositoryVerificationResult,
9+
RepositoryVerificationStatus,
10+
RepoVerifierBase,
11+
)
12+
from macaron.repo_verifier.repo_verifier_gradle import RepoVerifierGradle
13+
from macaron.repo_verifier.repo_verifier_maven import RepoVerifierMaven
14+
from macaron.slsa_analyzer.build_tool import BaseBuildTool, Gradle, Maven
15+
16+
logger = logging.getLogger(__name__)
17+
18+
19+
def verify_repo(
20+
namespace: str | None,
21+
name: str,
22+
version: str,
23+
reported_repo_url: str,
24+
reported_repo_fs: str,
25+
build_tool: BaseBuildTool,
26+
) -> RepositoryVerificationResult:
27+
"""Verify whether the repository links back to the artifact.
28+
29+
Parameters
30+
----------
31+
namespace : str | None
32+
The namespace of the artifact.
33+
name : str
34+
The name of the artifact.
35+
version : str
36+
The version of the artifact.
37+
reported_repo_url : str
38+
The reported repository URL.
39+
reported_repo_fs : str
40+
The reported repository filesystem path.
41+
build_tool : BaseBuildTool
42+
The build tool used to build the package.
43+
44+
Returns
45+
-------
46+
RepositoryVerificationResult
47+
The result of the repository verification
48+
"""
49+
# TODO: Add support for other build tools.
50+
verifier_map: dict[type[BaseBuildTool], type[RepoVerifierBase]] = {
51+
Maven: RepoVerifierMaven,
52+
Gradle: RepoVerifierGradle,
53+
# Poetry(): RepoVerifierPoetry,
54+
# Pip(): RepoVerifierPip,
55+
# Docker(): RepoVerifierDocker,
56+
# NPM(): RepoVerifierNPM,
57+
# Yarn(): RepoVerifierYarn,
58+
# Go(): RepoVerifierGo,
59+
}
60+
61+
verifier_cls = verifier_map.get(type(build_tool))
62+
if not verifier_cls:
63+
return RepositoryVerificationResult(
64+
status=RepositoryVerificationStatus.UNKNOWN, reason="unsupported_type", build_tool=build_tool
65+
)
66+
67+
verifier = verifier_cls(
68+
namespace=namespace,
69+
name=name,
70+
version=version,
71+
reported_repo_url=reported_repo_url,
72+
reported_repo_fs=reported_repo_fs,
73+
)
74+
75+
return verifier.verify_repo()
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module contains the base class and core data models for repository verification."""
5+
import abc
6+
import logging
7+
import os
8+
from collections import deque
9+
from dataclasses import dataclass
10+
from enum import Enum
11+
from pathlib import Path
12+
13+
from macaron.slsa_analyzer.build_tool import BaseBuildTool
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
def find_file_in_repo(root_dir: Path, filename: str) -> Path | None:
19+
"""Find the highest level file with a given name in a local repository.
20+
21+
This function ignores certain paths that are not under the main source code directories.
22+
23+
Parameters
24+
----------
25+
root_dir : Path
26+
The root directory of the repository.
27+
filename : str
28+
The name of the file to search for.
29+
30+
Returns
31+
-------
32+
Path | None
33+
The path to the file if it exists, otherwise
34+
"""
35+
# TODO: Consider using BaseBuildTool.get_build_dirs.
36+
# + Refactor 'get_build_dirs' to skip certain directories
37+
# that are most likely not part of the main codebase (e.g., sample).
38+
# + Need to find a way to look for other
39+
# files (e.g., gradle.properties) for the purpose of repo verification
40+
# without breaking the current logic of finding build directories.
41+
# + Add the capability to return the content/path of the file.
42+
if not os.path.isdir(root_dir):
43+
return None
44+
45+
queue: deque[Path] = deque()
46+
queue.append(Path(root_dir))
47+
while queue:
48+
current_dir = queue.popleft()
49+
50+
# Don't look through non-main directories.
51+
if any(
52+
keyword in current_dir.name.lower()
53+
for keyword in ["test", "example", "sample", "doc", "demo", "spec", "mock"]
54+
):
55+
continue
56+
57+
if Path(current_dir, filename).exists():
58+
return Path(current_dir, filename)
59+
60+
# Ignore symlinks to prevent potential infinite loop.
61+
sub_dirs = [Path(it) for it in current_dir.iterdir() if it.is_dir() and not it.is_symlink()]
62+
queue.extend(sub_dirs)
63+
64+
return None
65+
66+
67+
class RepositoryVerificationStatus(str, Enum):
68+
"""A class to store the status of the repo verification."""
69+
70+
#: We found evidence to prove that the repository can be linked back to the publisher of the artifact.
71+
PASSED = "passed"
72+
73+
#: We found evidence showing that the repository is not the publisher of the artifact.
74+
FAILED = "failed"
75+
76+
#: We could not find any evidence to prove or disprove that the repository can be linked back to the artifact.
77+
UNKNOWN = "unknown"
78+
79+
80+
@dataclass(frozen=True)
81+
class RepositoryVerificationResult:
82+
"""A class to store the information about repository verification."""
83+
84+
#: The status of the repository verification.
85+
status: RepositoryVerificationStatus
86+
87+
#: The reason for the verification result.
88+
reason: str
89+
90+
#: The build tool used to build the package.
91+
build_tool: BaseBuildTool
92+
93+
94+
class RepoVerifierBase(abc.ABC):
95+
"""The base class to verify whether a reported repository links back to the artifact."""
96+
97+
@property
98+
@abc.abstractmethod
99+
def build_tool(self) -> BaseBuildTool:
100+
"""Define the build tool used to build the package."""
101+
102+
def __init__(
103+
self,
104+
namespace: str | None,
105+
name: str,
106+
version: str,
107+
reported_repo_url: str,
108+
reported_repo_fs: str,
109+
):
110+
"""Instantiate the class.
111+
112+
Parameters
113+
----------
114+
namespace : str
115+
The namespace of the artifact.
116+
name : str
117+
The name of the artifact.
118+
version : str
119+
The version of the artifact.
120+
reported_repo_url : str
121+
The URL of the repository reported by the publisher.
122+
reported_repo_fs : str
123+
The file system path of the reported repository.
124+
"""
125+
self.namespace = namespace
126+
self.name = name
127+
self.version = version
128+
self.reported_repo_url = reported_repo_url
129+
self.reported_repo_fs = reported_repo_fs
130+
131+
@abc.abstractmethod
132+
def verify_repo(self) -> RepositoryVerificationResult:
133+
"""Verify whether the repository links back to the artifact.
134+
135+
Returns
136+
-------
137+
RepositoryVerificationResult
138+
The result of the repository verification
139+
"""

0 commit comments

Comments
 (0)