Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,13 @@ def __init__(
github_client: GitHub,
local_cache_ttl: int = 86400,
mirrors: list[MirrorSpec] | None = None,
branch_override: str | None = None,
) -> None:
super().__init__(local_cache_dir, local_cache_ttl)
self.mirrors = mirrors or []
self.github_client = github_client
self.branch_override = branch_override
self._branch_override_repo_key: tuple[str, str] | None = None
self._canonical_urls_cache: dict[str, tuple[str, str | None]] = {}
self._repository_info_cache: dict[str, tuple[int, dict[str, Any] | None]] = {}
logger.info(
Expand All @@ -119,6 +122,23 @@ def __init__(
self.local_cache_ttl,
)

def set_branch_override_target(self, package_url: str) -> None:
"""Set the target repository for the branch override.

The branch override will only apply when get_code is called for a URL
that resolves to the same owner/repo as the given package URL.
"""
canonical_url, _ = self.get_canonical_urls(package_url)
parsed = parse_git_url(canonical_url)
if parsed.valid and parsed.github:
self._branch_override_repo_key = (parsed.owner, parsed.repo)
logger.info(
"Branch override '%s' set for %s/%s",
self.branch_override,
parsed.owner,
parsed.repo,
)

def get_canonical_urls(self, url: str) -> tuple[str, str | None]:
"""Get the canonical repository URL and API URL for a given URL.

Expand Down Expand Up @@ -379,6 +399,17 @@ def get_code(
validated_ref = extract_ref(original_parsed_url.branch, repository_url)
if validated_ref != "":
branch = validated_ref
# Apply branch override only for the targeted repository
if (
branch == "default_branch"
and self.branch_override
and self._branch_override_repo_key
and (owner, repo) == self._branch_override_repo_key
):
branch = self.branch_override

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Validate branch override before shelling out to git clone

This assignment accepts raw --branch input and later feeds it into git clone ... --branch={effective_branch} executed through run_command (os.system), so shell metacharacters in the branch value can execute arbitrary commands. The new CLI option introduces a direct user-controlled path into a shell command; validate the ref format and avoid shell-string execution for clone arguments.

Useful? React with 👍 / 👎.

logger.debug(
"Applied branch override '%s' for %s/%s", branch, owner, repo
)
if original_parsed_url.path_raw.startswith("/tree/"):
path = original_parsed_url.path_raw.removeprefix(f"/tree/{branch}")
elif original_parsed_url.path_raw.startswith("/blob/"):
Expand Down
13 changes: 12 additions & 1 deletion src/dd_license_attribution/cli/generate_sbom_csv_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,14 @@ def generate_sbom_csv(
rich_help_panel="Scanning Options",
),
] = None,
branch: Annotated[
str | None,
typer.Option(
"--branch",
help="The branch to analyze. If not provided, the default branch of the repository will be used.",
rich_help_panel="Scanning Options",
),
] = None,
) -> None:
"""
Generate a CSV report (SBOM) of third party dependencies for a given
Expand Down Expand Up @@ -437,12 +445,15 @@ def generate_sbom_csv(

try:
source_code_manager = SourceCodeManager(
cache_dir, github_client, cache_ttl, mirrors
cache_dir, github_client, cache_ttl, mirrors, branch_override=branch
)
except ValueError as e:
logger.error(str(e))
sys.exit(1)

if branch:
source_code_manager.set_branch_override_target(package)
Comment on lines +454 to +455

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Honor --branch when GitHub SBOM strategy is enabled

When --branch is provided, this code only configures SourceCodeManager, but GitHubSbomMetadataCollectionStrategy is still enabled and its __get_github_generated_sbom call fetches SBOM by owner/repo without any branch/ref input. In repositories where the selected branch differs from default, the run can mix default-branch SBOM dependencies with branch-specific source scans, which breaks the new CLI contract to analyze the chosen branch; consider disabling SBOM strategy (or making it ref-aware) when --branch is set.

Useful? React with 👍 / 👎.


if enabled_strategies["GitHubSbomMetadataCollectionStrategy"]:
strategies.append(
GitHubSbomMetadataCollectionStrategy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,15 @@ def _translate_github_path(self, path: str) -> str:
# Use branch cache initialized in constructor
repo_url = f"https://{parts[0]}/{parts[1]}/{parts[2]}"
if repo_url not in self._head_branch_cache:
self._head_branch_cache[repo_url] = (
output_from_command(f"git ls-remote --symref {repo_url} HEAD")
.split()[1]
.removeprefix("refs/heads/")
)
ls_remote_output = output_from_command(
f"git ls-remote --symref {repo_url} HEAD"
).split()
if len(ls_remote_output) > 1:
self._head_branch_cache[repo_url] = ls_remote_output[
1
].removeprefix("refs/heads/")
else:
return f"https://{path}"
branch = self._head_branch_cache[repo_url]
return f"{repo_url}/tree/{branch}/{parts[3]}"
return f"https://{path}"
Expand Down
Loading