Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 37 additions & 27 deletions charon/cmd/cmd_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
from typing import List

from charon.config import get_config
from charon.utils.archive import detect_npm_archive, NpmArchiveType
from charon.utils.archive import detect_npm_archives, NpmArchiveType
from charon.pkgs.maven import handle_maven_uploading
from charon.pkgs.npm import handle_npm_uploading
from charon.cmd.internal import (
_decide_mode, _validate_prod_key,
_get_local_repo, _get_targets,
_get_local_repos, _get_targets,
_get_ignore_patterns, _safe_delete
)
from click import command, option, argument
Expand All @@ -35,8 +35,9 @@


@argument(
"repo",
"repos",
type=str,
nargs=-1 # This allows multiple arguments for zip urls
)
@option(
"--product",
Expand Down Expand Up @@ -138,7 +139,7 @@
@option("--dryrun", "-n", is_flag=True, default=False)
@command()
def upload(
repo: str,
repos: List[str],
product: str,
version: str,
targets: List[str],
Expand All @@ -152,9 +153,9 @@ def upload(
quiet=False,
dryrun=False
):
"""Upload all files from a released product REPO to Ronda
Service. The REPO points to a product released tarball which
is hosted in a remote url or a local path.
"""Upload all files from released product REPOs to Ronda
Service. The REPOs point to a product released tarballs which
are hosted in remote urls or local paths.
"""
tmp_dir = work_dir
try:
Expand All @@ -173,8 +174,8 @@ def upload(
logger.error("No AWS profile specified!")
sys.exit(1)

archive_path = _get_local_repo(repo)
npm_archive_type = detect_npm_archive(archive_path)
archive_paths = _get_local_repos(repos)
archive_types = detect_npm_archives(archive_paths)
product_key = f"{product}-{version}"
manifest_bucket_name = conf.get_manifest_bucket()
targets_ = _get_targets(targets, conf)
Expand All @@ -185,31 +186,18 @@ def upload(
" are set correctly.", targets_
)
sys.exit(1)
if npm_archive_type != NpmArchiveType.NOT_NPM:
logger.info("This is a npm archive")
tmp_dir, succeeded = handle_npm_uploading(
archive_path,
product_key,
targets=targets_,
aws_profile=aws_profile,
dir_=work_dir,
gen_sign=contain_signature,
cf_enable=conf.is_aws_cf_enable(),
key=sign_key,
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
)
if not succeeded:
sys.exit(1)
else:

maven_count = archive_types.count(NpmArchiveType.NOT_NPM)
npm_count = len(archive_types) - maven_count
if maven_count == len(archive_types):
ignore_patterns_list = None
if ignore_patterns:
ignore_patterns_list = ignore_patterns
else:
ignore_patterns_list = _get_ignore_patterns(conf)
logger.info("This is a maven archive")
tmp_dir, succeeded = handle_maven_uploading(
archive_path,
archive_paths,
product_key,
ignore_patterns_list,
root=root_path,
Expand All @@ -225,6 +213,28 @@ def upload(
)
if not succeeded:
sys.exit(1)
elif npm_count == len(archive_types) and len(archive_types) == 1:
logger.info("This is a npm archive")
tmp_dir, succeeded = handle_npm_uploading(
archive_paths[0],
product_key,
targets=targets_,
aws_profile=aws_profile,
dir_=work_dir,
gen_sign=contain_signature,
cf_enable=conf.is_aws_cf_enable(),
key=sign_key,
dry_run=dryrun,
manifest_bucket_name=manifest_bucket_name
)
if not succeeded:
sys.exit(1)
elif npm_count == len(archive_types) and len(archive_types) > 1:
logger.error("Doesn't support multiple upload for npm")
sys.exit(1)
else:
logger.error("Upload types are not consistent")
sys.exit(1)
except Exception:
print(traceback.format_exc())
sys.exit(2) # distinguish between exception and bad config or bad state
Expand Down
8 changes: 8 additions & 0 deletions charon/cmd/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ def _get_local_repo(url: str) -> str:
return archive_path


def _get_local_repos(urls: list) -> list:
archive_paths = []
for url in urls:
archive_path = _get_local_repo(url)
archive_paths.append(archive_path)
return archive_paths


def _validate_prod_key(product: str, version: str) -> bool:
if not product or product.strip() == "":
logger.error("Error: product can not be empty!")
Expand Down
140 changes: 136 additions & 4 deletions charon/pkgs/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@
META_FILE_FAILED, MAVEN_METADATA_TEMPLATE,
ARCHETYPE_CATALOG_TEMPLATE, ARCHETYPE_CATALOG_FILENAME,
PACKAGE_TYPE_MAVEN)
from typing import Dict, List, Tuple
from typing import Dict, List, Tuple, Union
from jinja2 import Template
from datetime import datetime
from zipfile import ZipFile, BadZipFile
from tempfile import mkdtemp
from shutil import rmtree, copy2
from defusedxml import ElementTree

import os
Expand Down Expand Up @@ -261,7 +262,7 @@ def __gen_digest_file(hash_file_path, meta_file_path: str, hashtype: HashType) -


def handle_maven_uploading(
repo: str,
repos: Union[str, List[str]],
prod_key: str,
ignore_patterns=None,
root="maven-repository",
Expand Down Expand Up @@ -294,8 +295,10 @@ def handle_maven_uploading(
"""
if targets is None:
targets = []
# 1. extract tarball
tmp_root = _extract_tarball(repo, prod_key, dir__=dir_)
if isinstance(repos, str):
repos = [repos]
# 1. extract tarballs
tmp_root = _extract_tarballs(repos, root, prod_key, dir__=dir_)

# 2. scan for paths and filter out the ignored paths,
# and also collect poms for later metadata generation
Expand Down Expand Up @@ -673,6 +676,135 @@ def _extract_tarball(repo: str, prefix="", dir__=None) -> str:
sys.exit(1)


def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str:
""" Extract multiple zip archives to a temporary directory.
* repos are the list of repo paths to extract
* root is a prefix in the tarball to identify which path is
the beginning of the maven GAV path
* prefix is the prefix for temporary directory name
* dir__ is the directory where temporary directories will be created.

Returns the path to the merged temporary directory containing all extracted files
"""
# Create final merge directory
final_tmp_root = mkdtemp(prefix=f"charon-{prefix}-final-", dir=dir__)

total_copied = 0
total_overwritten = 0
total_processed = 0

# Collect all extracted directories first
extracted_dirs = []

for repo in repos:
if os.path.exists(repo):
try:
logger.info("Extracting tarball %s", repo)
repo_zip = ZipFile(repo)
tmp_root = mkdtemp(prefix=f"charon-{prefix}-", dir=dir__)
extract_zip_all(repo_zip, tmp_root)
extracted_dirs.append(tmp_root)

except BadZipFile as e:
logger.error("Tarball extraction error: %s", e)
sys.exit(1)
else:
logger.error("Error: archive %s does not exist", repo)
sys.exit(1)

# Merge all extracted directories
if extracted_dirs:
# Get top-level directory names for merged from all repos
top_level_merged_name_dirs = []
for extracted_dir in extracted_dirs:
for item in os.listdir(extracted_dir):
item_path = os.path.join(extracted_dir, item)
# Check the root maven-repository subdirectory existence
maven_repo_path = os.path.join(item_path, root)
if os.path.isdir(item_path) and os.path.exists(maven_repo_path):
top_level_merged_name_dirs.append(item)
break

# Create merged directory name
merged_dir_name = (
"_".join(top_level_merged_name_dirs) if top_level_merged_name_dirs else "merged"
)
merged_dest_dir = os.path.join(final_tmp_root, merged_dir_name)

# Merge content from all extracted directories
for extracted_dir in extracted_dirs:
copied, overwritten, processed = _merge_directories_with_rename(
extracted_dir, merged_dest_dir, root
)
total_copied += copied
total_overwritten += overwritten
total_processed += processed

# Clean up temporary extraction directory
rmtree(extracted_dir)

logger.info(
"All zips merged! Total copied: %s, Total overwritten: %s, Total processed: %s",
total_copied,
total_overwritten,
total_processed,
)
return final_tmp_root


def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str):
""" Recursively copy files from src_dir to dest_dir, overwriting existing files.
* src_dir is the source directory to copy from
* dest_dir is the destination directory to copy to.

Returns Tuple of (copied_count, overwritten_count, processed_count)
"""
copied_count = 0
overwritten_count = 0
processed_count = 0

# Find the actual content directory
content_root = src_dir
for item in os.listdir(src_dir):
item_path = os.path.join(src_dir, item)
# Check the root maven-repository subdirectory existence
maven_repo_path = os.path.join(item_path, root)
if os.path.isdir(item_path) and os.path.exists(maven_repo_path):
content_root = item_path
break

# pylint: disable=unused-variable
for root_dir, dirs, files in os.walk(content_root):
# Calculate relative path from content root
rel_path = os.path.relpath(root_dir, content_root)
dest_root = os.path.join(dest_dir, rel_path) if rel_path != '.' else dest_dir

# Create destination directory if it doesn't exist
os.makedirs(dest_root, exist_ok=True)

# Copy all files, overwriting existing ones
for file in files:
src_file = os.path.join(root_dir, file)
dest_file = os.path.join(dest_root, file)
if os.path.exists(dest_file):
overwritten_count += 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should do this overwritten copy. Instead we can just mark it as "duplicated"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just think one case: if has two same files name, but content may not be consistent, we will always keep the latter coming one as the latest to override, @ligangty WDYT?
I also thought of the checksum method, but it might cost much for the thousands of comparisons, but such situations that require override are rare.

Copy link
Member

@ligangty ligangty Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to do this, because we don't know which one is the right one. If there is overlapping, let's always consider the first one as the right one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logger.debug("Overwritten: %s -> %s", src_file, dest_file)
else:
copied_count += 1
logger.debug("Copied: %s -> %s", src_file, dest_file)

processed_count += 1
copy2(src_file, dest_file)

logger.info(
"One zip merged! Files copied: %s, Files overwritten: %s, Total files processed: %s",
copied_count,
overwritten_count,
processed_count,
)
return copied_count, overwritten_count, processed_count


def _scan_paths(files_root: str, ignore_patterns: List[str],
root: str) -> Tuple[str, List[str], List[str], List[str]]:
# 2. scan for paths and filter out the ignored paths,
Expand Down
13 changes: 13 additions & 0 deletions charon/utils/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,19 @@ def detect_npm_archive(repo):
return NpmArchiveType.NOT_NPM


def detect_npm_archives(repos):
"""Detects, if the archives need to have npm workflow.
:parameter repos list of repository directories
:return list of NpmArchiveType values
"""
results = []
for repo in repos:
result = detect_npm_archive(repo)
results.append(result)

return results


def download_archive(url: str, base_dir=None) -> str:
dir_ = base_dir
if not dir_ or not os.path.isdir(dir_):
Expand Down
33 changes: 32 additions & 1 deletion tests/test_archive.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tests.base import BaseTest
from charon.utils.archive import NpmArchiveType, detect_npm_archive
from charon.utils.archive import NpmArchiveType, detect_npm_archive, detect_npm_archives
import os

from tests.constants import INPUTS
Expand All @@ -12,5 +12,36 @@ def test_detect_package(self):
npm_tarball = os.path.join(INPUTS, "code-frame-7.14.5.tgz")
self.assertEqual(NpmArchiveType.TAR_FILE, detect_npm_archive(npm_tarball))

def test_detect_packages(self):
mvn_tarballs = [
os.path.join(INPUTS, "commons-client-4.5.6.zip"),
os.path.join(INPUTS, "commons-client-4.5.9.zip")
]
archive_types = detect_npm_archives(mvn_tarballs)
self.assertEqual(2, archive_types.count(NpmArchiveType.NOT_NPM))

npm_tarball = [
os.path.join(INPUTS, "code-frame-7.14.5.tgz")
]
archive_types = detect_npm_archives(npm_tarball)
self.assertEqual(1, archive_types.count(NpmArchiveType.TAR_FILE))

npm_tarballs = [
os.path.join(INPUTS, "code-frame-7.14.5.tgz"),
os.path.join(INPUTS, "code-frame-7.15.8.tgz")
]
archive_types = detect_npm_archives(npm_tarballs)
self.assertEqual(2, archive_types.count(NpmArchiveType.TAR_FILE))

tarballs = [
os.path.join(INPUTS, "commons-client-4.5.6.zip"),
os.path.join(INPUTS, "commons-client-4.5.9.zip"),
os.path.join(INPUTS, "code-frame-7.14.5.tgz"),
os.path.join(INPUTS, "code-frame-7.15.8.tgz")
]
archive_types = detect_npm_archives(tarballs)
self.assertEqual(2, archive_types.count(NpmArchiveType.NOT_NPM))
self.assertEqual(2, archive_types.count(NpmArchiveType.TAR_FILE))

def test_download_archive(self):
pass
Loading