-
Notifications
You must be signed in to change notification settings - Fork 1
Afos/io connections commands #38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
b810af5
9107225
e068890
a108a17
a874c12
3a81b6b
a01f8f6
f4de22e
0febcd9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +0,0 @@ | ||
# Copyright (c) Cosmo Tech corporation. | ||
# Licensed under the MIT license. | ||
import os | ||
parametersPath = os.environ.get("CSM_PARAMETERS_ABSOLUTE_PATH") | ||
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
# specifically authorized by written means by Cosmo Tech. | ||
|
||
import pathlib | ||
from typing import Optional | ||
|
||
import boto3 | ||
|
||
|
@@ -14,11 +15,6 @@ | |
from cosmotech.coal.utils.logger import LOGGER | ||
|
||
|
||
def get_connection(is_client=True): | ||
connect_function = boto3.client if is_client else boto3.resource | ||
return connect_function('s3') | ||
|
||
|
||
@click.command() | ||
@click.option("--target-folder", | ||
envvar="CSM_DATASET_ABSOLUTE_PATH", | ||
|
@@ -34,28 +30,96 @@ def get_connection(is_client=True): | |
type=str, | ||
show_envvar=True, | ||
required=True) | ||
@click.option("--prefix", | ||
"file_prefix", | ||
envvar="CSM_DATA_BUCKET_PREFIX", | ||
help="A prefix by which all downloaded files should start in the bucket", | ||
metavar="PREFIX", | ||
type=str, | ||
show_envvar=True) | ||
@click.option("--use-ssl/--no-ssl", | ||
default=True, | ||
help="Use SSL to secure connection to S3", | ||
type=bool, | ||
is_flag=True) | ||
@click.option("--s3-url", | ||
"endpoint_url", | ||
help="URL to connect to the S3 system", | ||
type=str, | ||
required=True, | ||
show_envvar=True, | ||
metavar="URL", | ||
envvar="AWS_ENDPOINT_URL") | ||
@click.option("--access-id", | ||
"access_id", | ||
help="Identity used to connect to the S3 system", | ||
type=str, | ||
required=True, | ||
show_envvar=True, | ||
metavar="ID", | ||
envvar="AWS_ACCESS_KEY_ID") | ||
@click.option("--secret-key", | ||
"secret_key", | ||
help="Secret tied to the ID used to connect to the S3 system", | ||
type=str, | ||
required=True, | ||
show_envvar=True, | ||
metavar="ID", | ||
envvar="AWS_SECRET_ACCESS_KEY") | ||
@click.option("--ssl-cert-bundle", | ||
help="Path to an alternate CA Bundle to validate SSL connections", | ||
type=str, | ||
show_envvar=True, | ||
metavar="PATH", | ||
envvar="CSM_S3_CA_BUNDLE") | ||
@web_help("csm-data/s3-bucket-load") | ||
def s3_bucket_load(target_folder, bucket_name): | ||
def s3_bucket_load( | ||
|
||
target_folder: str, | ||
bucket_name: str, | ||
file_prefix: str, | ||
endpoint_url: str, | ||
access_id: str, | ||
secret_key: str, | ||
use_ssl: bool = True, | ||
ssl_cert_bundle: Optional[str] = None, | ||
): | ||
"""Download S3 bucket content to a given folder | ||
|
||
Make use of the default AWS/S3 configuration to access the bucket | ||
Will download everything in the bucket unless a prefix is set, then only file following the given preix will be downloaded | ||
|
||
|
||
Make use of the boto3 library to access the bucket | ||
|
||
More information is available on this page: | ||
[https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html) | ||
|
||
The following environment variables can be used to configure the connection: | ||
- `AWS_ENDPOINT_URL` : The uri pointing to the S3 service endpoint | ||
- `AWS_ACCESS_KEY_ID` : Your access key to the service | ||
- `AWS_SECRET_ACCESS_KEY` : The secret associated to the access key | ||
""" | ||
s3_resource = get_connection(False) | ||
s3_client = get_connection() | ||
boto3_parameters = { | ||
"use_ssl": use_ssl, | ||
"endpoint_url": endpoint_url, | ||
"aws_access_key_id": access_id, | ||
"aws_secret_access_key": secret_key, | ||
} | ||
if ssl_cert_bundle: | ||
boto3_parameters["verify"] = ssl_cert_bundle | ||
|
||
s3_resource = boto3.resource("s3", | ||
**boto3_parameters) | ||
|
||
bucket = s3_resource.Bucket(bucket_name) | ||
|
||
pathlib.Path(target_folder).mkdir(parents=True, exist_ok=True) | ||
|
||
for _file in bucket.objects.all(): | ||
LOGGER.info(f"Downloading {_file.key}") | ||
output_file = f"{target_folder}/{_file.key}" | ||
s3_client.download_file(bucket_name, _file.key, output_file) | ||
remove_prefix = False | ||
if file_prefix: | ||
bucket_files = bucket.objects.filter(Prefix=file_prefix) | ||
if file_prefix.endswith("/"): | ||
remove_prefix = True | ||
else: | ||
bucket_files = bucket.objects.all() | ||
for _file in bucket_files: | ||
if not (path_name := str(_file.key)).endswith("/"): | ||
target_file = path_name | ||
if remove_prefix: | ||
target_file = target_file.removeprefix(file_prefix) | ||
output_file = f"{target_folder}/{target_file}" | ||
pathlib.Path(output_file).parent.mkdir(parents=True,exist_ok=True) | ||
LOGGER.info(f"Downloading {path_name} to {output_file}") | ||
bucket.download_file(_file.key, output_file) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# Copyright (C) - 2023 - 2024 - Cosmo Tech | ||
# This document and all information contained herein is the exclusive property - | ||
# including all intellectual property rights pertaining thereto - of Cosmo Tech. | ||
# Any use, reproduction, translation, broadcasting, transmission, distribution, | ||
# etc., to any person is prohibited unless it has been previously and | ||
# specifically authorized by written means by Cosmo Tech. | ||
|
||
import pathlib | ||
from typing import Optional | ||
|
||
import boto3 | ||
|
||
from cosmotech.coal.cli.utils.click import click | ||
from cosmotech.coal.cli.utils.decorators import web_help | ||
from cosmotech.coal.utils.logger import LOGGER | ||
|
||
|
||
@click.command() | ||
@click.option("--source-folder", | ||
envvar="CSM_DATASET_ABSOLUTE_PATH", | ||
help="The folder/file to upload to the target bucket", | ||
metavar="PATH", | ||
type=str, | ||
show_envvar=True, | ||
required=True) | ||
@click.option("--recursive/--no-recursive", | ||
default=False, | ||
help="Recursively send the content of every folder inside the starting folder to the bucket", | ||
type=bool, | ||
is_flag=True) | ||
@click.option("--bucket-name", | ||
envvar="CSM_DATA_BUCKET_NAME", | ||
help="The bucket on S3 to upload to", | ||
metavar="BUCKET", | ||
type=str, | ||
show_envvar=True, | ||
required=True) | ||
@click.option("--prefix", | ||
"file_prefix", | ||
envvar="CSM_DATA_BUCKET_PREFIX", | ||
help="A prefix by which all uploaded files should start with in the bucket", | ||
metavar="PREFIX", | ||
type=str, | ||
show_envvar=True, | ||
default="") | ||
@click.option("--use-ssl/--no-ssl", | ||
default=True, | ||
help="Use SSL to secure connection to S3", | ||
type=bool, | ||
is_flag=True) | ||
@click.option("--s3-url", | ||
"endpoint_url", | ||
help="URL to connect to the S3 system", | ||
type=str, | ||
required=True, | ||
show_envvar=True, | ||
metavar="URL", | ||
envvar="AWS_ENDPOINT_URL") | ||
@click.option("--access-id", | ||
"access_id", | ||
help="Identity used to connect to the S3 system", | ||
type=str, | ||
required=True, | ||
show_envvar=True, | ||
metavar="ID", | ||
envvar="AWS_ACCESS_KEY_ID") | ||
@click.option("--secret-key", | ||
"secret_key", | ||
help="Secret tied to the ID used to connect to the S3 system", | ||
type=str, | ||
required=True, | ||
show_envvar=True, | ||
metavar="ID", | ||
envvar="AWS_SECRET_ACCESS_KEY") | ||
@click.option("--ssl-cert-bundle", | ||
help="Path to an alternate CA Bundle to validate SSL connections", | ||
type=str, | ||
show_envvar=True, | ||
metavar="PATH", | ||
envvar="CSM_S3_CA_BUNDLE") | ||
@web_help("csm-data/s3-bucket-upload") | ||
def s3_bucket_upload( | ||
source_folder, | ||
bucket_name: str, | ||
endpoint_url: str, | ||
access_id: str, | ||
secret_key: str, | ||
file_prefix: str = "", | ||
use_ssl: bool = True, | ||
ssl_cert_bundle: Optional[str] = None, | ||
recursive: bool = False | ||
): | ||
"""Upload a folder to a S3 Bucket | ||
|
||
Will upload everything from a given folder to a S3 bucket. If a single file is passed only it will be uploaded, and recursive will be ignored | ||
|
||
Giving a prefix will add it to every upload (finishing the prefix with a "/" will allow to upload in a folder inside the bucket) | ||
|
||
Make use of the boto3 library to access the bucket | ||
|
||
More information is available on this page: | ||
[https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html) | ||
""" | ||
source_path = pathlib.Path(source_folder) | ||
if not source_path.exists(): | ||
LOGGER.error(f"{source_folder} does not exists") | ||
raise FileNotFoundError(f"{source_folder} does not exists") | ||
|
||
boto3_parameters = { | ||
"use_ssl": use_ssl, | ||
"endpoint_url": endpoint_url, | ||
"aws_access_key_id": access_id, | ||
"aws_secret_access_key": secret_key, | ||
} | ||
if ssl_cert_bundle: | ||
boto3_parameters["verify"] = ssl_cert_bundle | ||
|
||
s3_client = boto3.client("s3", **boto3_parameters) | ||
|
||
def file_upload(file_path: pathlib.Path, file_name: str): | ||
uploaded_file_name = file_prefix + file_name | ||
LOGGER.info(f"Sending {file_path} as {uploaded_file_name}") | ||
s3_client.upload_file(file_path, bucket_name, uploaded_file_name) | ||
|
||
if source_path.is_dir(): | ||
_source_name = str(source_path) | ||
for _file_path in source_path.glob("**/*" if recursive else "*"): | ||
if _file_path.is_file(): | ||
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/") | ||
file_upload(_file_path, _file_name) | ||
else: | ||
file_upload(source_path, source_path.name) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe change the name to "prefix_filter" to not confuse with a added prefix to file at upload.