Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions CosmoTech_Acceleration_Library/Accelerators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
# Copyright (c) Cosmo Tech corporation.
# Licensed under the MIT license.
import os
parametersPath = os.environ.get("CSM_PARAMETERS_ABSOLUTE_PATH")
70 changes: 0 additions & 70 deletions CosmoTech_Acceleration_Library/Accelerators/cosmo_api.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@
from cosmotech_api import TwinGraphQuery
from openpyxl import load_workbook

from CosmoTech_Acceleration_Library.Accelerators.utils.multi_environment import MultiEnvironment
from cosmotech.coal.cosmotech_api.connection import get_api_client

env = MultiEnvironment()


def get_content_from_twin_graph_data(nodes, relationships, restore_names=False):
'''
Expand Down

This file was deleted.

2 changes: 0 additions & 2 deletions CosmoTech_Acceleration_Library/Core/DataInterface/__init__.py

This file was deleted.

2 changes: 0 additions & 2 deletions CosmoTech_Acceleration_Library/Core/DataStorage/__init__.py

This file was deleted.

Empty file.
102 changes: 83 additions & 19 deletions cosmotech/coal/cli/commands/s3_bucket_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# specifically authorized by written means by Cosmo Tech.

import pathlib
from typing import Optional

import boto3

Expand All @@ -14,11 +15,6 @@
from cosmotech.coal.utils.logger import LOGGER


def get_connection(is_client=True):
connect_function = boto3.client if is_client else boto3.resource
return connect_function('s3')


@click.command()
@click.option("--target-folder",
envvar="CSM_DATASET_ABSOLUTE_PATH",
Expand All @@ -34,28 +30,96 @@ def get_connection(is_client=True):
type=str,
show_envvar=True,
required=True)
@click.option("--prefix",
"file_prefix",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe change the name to "prefix_filter" to not confuse with a added prefix to file at upload.

envvar="CSM_DATA_BUCKET_PREFIX",
help="A prefix by which all downloaded files should start in the bucket",
metavar="PREFIX",
type=str,
show_envvar=True)
@click.option("--use-ssl/--no-ssl",
default=True,
help="Use SSL to secure connection to S3",
type=bool,
is_flag=True)
@click.option("--s3-url",
"endpoint_url",
help="URL to connect to the S3 system",
type=str,
required=True,
show_envvar=True,
metavar="URL",
envvar="AWS_ENDPOINT_URL")
@click.option("--access-id",
"access_id",
help="Identity used to connect to the S3 system",
type=str,
required=True,
show_envvar=True,
metavar="ID",
envvar="AWS_ACCESS_KEY_ID")
@click.option("--secret-key",
"secret_key",
help="Secret tied to the ID used to connect to the S3 system",
type=str,
required=True,
show_envvar=True,
metavar="ID",
envvar="AWS_SECRET_ACCESS_KEY")
@click.option("--ssl-cert-bundle",
help="Path to an alternate CA Bundle to validate SSL connections",
type=str,
show_envvar=True,
metavar="PATH",
envvar="CSM_S3_CA_BUNDLE")
@web_help("csm-data/s3-bucket-load")
def s3_bucket_load(target_folder, bucket_name):
def s3_bucket_load(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename to s3_bucket_download to avoid confusion

target_folder: str,
bucket_name: str,
file_prefix: str,
endpoint_url: str,
access_id: str,
secret_key: str,
use_ssl: bool = True,
ssl_cert_bundle: Optional[str] = None,
):
"""Download S3 bucket content to a given folder

Make use of the default AWS/S3 configuration to access the bucket
Will download everything in the bucket unless a prefix is set, then only file following the given preix will be downloaded
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo: at the end: the given preix will -> the given prefix will


Make use of the boto3 library to access the bucket

More information is available on this page:
[https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html)

The following environment variables can be used to configure the connection:
- `AWS_ENDPOINT_URL` : The uri pointing to the S3 service endpoint
- `AWS_ACCESS_KEY_ID` : Your access key to the service
- `AWS_SECRET_ACCESS_KEY` : The secret associated to the access key
"""
s3_resource = get_connection(False)
s3_client = get_connection()
boto3_parameters = {
"use_ssl": use_ssl,
"endpoint_url": endpoint_url,
"aws_access_key_id": access_id,
"aws_secret_access_key": secret_key,
}
if ssl_cert_bundle:
boto3_parameters["verify"] = ssl_cert_bundle

s3_resource = boto3.resource("s3",
**boto3_parameters)

bucket = s3_resource.Bucket(bucket_name)

pathlib.Path(target_folder).mkdir(parents=True, exist_ok=True)

for _file in bucket.objects.all():
LOGGER.info(f"Downloading {_file.key}")
output_file = f"{target_folder}/{_file.key}"
s3_client.download_file(bucket_name, _file.key, output_file)
remove_prefix = False
if file_prefix:
bucket_files = bucket.objects.filter(Prefix=file_prefix)
if file_prefix.endswith("/"):
remove_prefix = True
else:
bucket_files = bucket.objects.all()
for _file in bucket_files:
if not (path_name := str(_file.key)).endswith("/"):
target_file = path_name
if remove_prefix:
target_file = target_file.removeprefix(file_prefix)
output_file = f"{target_folder}/{target_file}"
pathlib.Path(output_file).parent.mkdir(parents=True,exist_ok=True)
LOGGER.info(f"Downloading {path_name} to {output_file}")
bucket.download_file(_file.key, output_file)
132 changes: 132 additions & 0 deletions cosmotech/coal/cli/commands/s3_bucket_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Copyright (C) - 2023 - 2024 - Cosmo Tech
# This document and all information contained herein is the exclusive property -
# including all intellectual property rights pertaining thereto - of Cosmo Tech.
# Any use, reproduction, translation, broadcasting, transmission, distribution,
# etc., to any person is prohibited unless it has been previously and
# specifically authorized by written means by Cosmo Tech.

import pathlib
from typing import Optional

import boto3

from cosmotech.coal.cli.utils.click import click
from cosmotech.coal.cli.utils.decorators import web_help
from cosmotech.coal.utils.logger import LOGGER


@click.command()
@click.option("--source-folder",
envvar="CSM_DATASET_ABSOLUTE_PATH",
help="The folder/file to upload to the target bucket",
metavar="PATH",
type=str,
show_envvar=True,
required=True)
@click.option("--recursive/--no-recursive",
default=False,
help="Recursively send the content of every folder inside the starting folder to the bucket",
type=bool,
is_flag=True)
@click.option("--bucket-name",
envvar="CSM_DATA_BUCKET_NAME",
help="The bucket on S3 to upload to",
metavar="BUCKET",
type=str,
show_envvar=True,
required=True)
@click.option("--prefix",
"file_prefix",
envvar="CSM_DATA_BUCKET_PREFIX",
help="A prefix by which all uploaded files should start with in the bucket",
metavar="PREFIX",
type=str,
show_envvar=True,
default="")
@click.option("--use-ssl/--no-ssl",
default=True,
help="Use SSL to secure connection to S3",
type=bool,
is_flag=True)
@click.option("--s3-url",
"endpoint_url",
help="URL to connect to the S3 system",
type=str,
required=True,
show_envvar=True,
metavar="URL",
envvar="AWS_ENDPOINT_URL")
@click.option("--access-id",
"access_id",
help="Identity used to connect to the S3 system",
type=str,
required=True,
show_envvar=True,
metavar="ID",
envvar="AWS_ACCESS_KEY_ID")
@click.option("--secret-key",
"secret_key",
help="Secret tied to the ID used to connect to the S3 system",
type=str,
required=True,
show_envvar=True,
metavar="ID",
envvar="AWS_SECRET_ACCESS_KEY")
@click.option("--ssl-cert-bundle",
help="Path to an alternate CA Bundle to validate SSL connections",
type=str,
show_envvar=True,
metavar="PATH",
envvar="CSM_S3_CA_BUNDLE")
@web_help("csm-data/s3-bucket-upload")
def s3_bucket_upload(
source_folder,
bucket_name: str,
endpoint_url: str,
access_id: str,
secret_key: str,
file_prefix: str = "",
use_ssl: bool = True,
ssl_cert_bundle: Optional[str] = None,
recursive: bool = False
):
"""Upload a folder to a S3 Bucket

Will upload everything from a given folder to a S3 bucket. If a single file is passed only it will be uploaded, and recursive will be ignored

Giving a prefix will add it to every upload (finishing the prefix with a "/" will allow to upload in a folder inside the bucket)

Make use of the boto3 library to access the bucket

More information is available on this page:
[https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html)
"""
source_path = pathlib.Path(source_folder)
if not source_path.exists():
LOGGER.error(f"{source_folder} does not exists")
raise FileNotFoundError(f"{source_folder} does not exists")

boto3_parameters = {
"use_ssl": use_ssl,
"endpoint_url": endpoint_url,
"aws_access_key_id": access_id,
"aws_secret_access_key": secret_key,
}
if ssl_cert_bundle:
boto3_parameters["verify"] = ssl_cert_bundle

s3_client = boto3.client("s3", **boto3_parameters)

def file_upload(file_path: pathlib.Path, file_name: str):
uploaded_file_name = file_prefix + file_name
LOGGER.info(f"Sending {file_path} as {uploaded_file_name}")
s3_client.upload_file(file_path, bucket_name, uploaded_file_name)

if source_path.is_dir():
_source_name = str(source_path)
for _file_path in source_path.glob("**/*" if recursive else "*"):
if _file_path.is_file():
_file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
file_upload(_file_path, _file_name)
else:
file_upload(source_path, source_path.name)
Loading
Loading