From 2380a67ee17604016ceb86139432730764926902 Mon Sep 17 00:00:00 2001 From: William Siqueira Date: Wed, 1 Oct 2025 17:23:50 -0300 Subject: [PATCH 1/6] RHOAIENG-18582: Post ODH Elyra release (v4.3.0) --- .../ubi9-python-3.12/setup-elyra.sh | 4 +- .../ubi9-python-3.12/Dockerfile.cpu | 3 + .../ubi9-python-3.12/utils/bootstrapper.py | 769 ------------------ .../minimal/ubi9-python-3.12/Dockerfile.cpu | 3 + .../ubi9-python-3.12/utils/bootstrapper.py | 769 ------------------ .../ubi9-python-3.12/Dockerfile.cuda | 3 + .../ubi9-python-3.12/utils/bootstrapper.py | 769 ------------------ .../pytorch/ubi9-python-3.12/Dockerfile.cuda | 3 + .../ubi9-python-3.12/utils/bootstrapper.py | 769 ------------------ .../ubi9-python-3.12/Dockerfile.rocm | 4 + .../ubi9-python-3.12/utils/bootstrapper.py | 769 ------------------ .../ubi9-python-3.12/Dockerfile.rocm | 3 + .../ubi9-python-3.12/utils/bootstrapper.py | 769 ------------------ .../ubi9-python-3.12/Dockerfile.cuda | 3 + .../ubi9-python-3.12/utils/bootstrapper.py | 769 ------------------ 15 files changed, 25 insertions(+), 5384 deletions(-) delete mode 100644 runtimes/datascience/ubi9-python-3.12/utils/bootstrapper.py delete mode 100644 runtimes/minimal/ubi9-python-3.12/utils/bootstrapper.py delete mode 100644 runtimes/pytorch+llmcompressor/ubi9-python-3.12/utils/bootstrapper.py delete mode 100644 runtimes/pytorch/ubi9-python-3.12/utils/bootstrapper.py delete mode 100644 runtimes/rocm-pytorch/ubi9-python-3.12/utils/bootstrapper.py delete mode 100644 runtimes/rocm-tensorflow/ubi9-python-3.12/utils/bootstrapper.py delete mode 100644 runtimes/tensorflow/ubi9-python-3.12/utils/bootstrapper.py diff --git a/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh b/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh index 551a3a8a12..c1c0ee22b6 100644 --- a/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh +++ b/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh @@ -20,6 +20,8 @@ if [ "$(ls -A /opt/app-root/pipeline-runtimes/)" ]; then fi # Environment vars set for accessing ssl_sa_certs and sa_token -# export PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +export KF_PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" export KF_PIPELINES_SA_TOKEN_ENV="/var/run/secrets/kubernetes.io/serviceaccount/token" export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token" +export ELYRA_INSTALL_PACKAGES="false" +export ELYRA_GENERIC_NODES_ENABLE_SCRIPT_OUTPUT_TO_S3="false" \ No newline at end of file diff --git a/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu b/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu index 22713dff80..d2ff30e3d9 100644 --- a/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu +++ b/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu @@ -330,6 +330,9 @@ fi COPY ${DATASCIENCE_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${DATASCIENCE_SOURCE_CODE}/utils ./utils/ +# Download Elyra boostrapper.py +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ + -o ./utils/bootstrapper.py RUN --mount=type=cache,target=/root/.cache/pip \ echo "Installing softwares and packages" && \ diff --git a/runtimes/datascience/ubi9-python-3.12/utils/bootstrapper.py b/runtimes/datascience/ubi9-python-3.12/utils/bootstrapper.py deleted file mode 100644 index 8009048682..0000000000 --- a/runtimes/datascience/ubi9-python-3.12/utils/bootstrapper.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py -# -# Copyright 2018-2023 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from abc import abstractmethod -import glob -import json -import logging -import os -from pathlib import Path -import subprocess -import sys -from tempfile import TemporaryFile -import time -from typing import Any -from typing import Dict -from typing import Optional -from typing import Type -from typing import TypeVar -from urllib.parse import urljoin -from urllib.parse import urlparse -from urllib.parse import urlunparse - -from packaging import version - - -# Inputs and Outputs separator character. If updated, -# same-named variable in _notebook_op.py must be updated! -INOUT_SEPARATOR = ";" - -# Setup forward reference for type hint on return from class factory method. See -# https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 -F = TypeVar("F", bound="FileOpBase") - -logger = logging.getLogger("elyra") -enable_pipeline_info = os.getenv("ELYRA_ENABLE_PIPELINE_INFO", "true").lower() == "true" -pipeline_name = None # global used in formatted logging -operation_name = None # global used in formatted logging - - -class FileOpBase(ABC): - """Abstract base class for file-based operations""" - - filepath = None - cos_client = None - cos_bucket = None - - @classmethod - def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" - filepath = kwargs["filepath"] - if ".ipynb" in filepath: - return NotebookFileOp(**kwargs) - elif ".py" in filepath: - return PythonFileOp(**kwargs) - elif ".r" in filepath: - return RFileOp(**kwargs) - else: - raise ValueError(f"Unsupported file type: {filepath}") - - def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" - import minio - from minio.credentials import providers - - self.filepath = kwargs["filepath"] - self.input_params = kwargs or {} - self.cos_endpoint = urlparse(self.input_params.get("cos-endpoint")) - self.cos_bucket = self.input_params.get("cos-bucket") - - self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) - - # Infer secure from the endpoint's scheme. - self.secure = self.cos_endpoint.scheme == "https" - - # get minio credentials provider - if "cos-user" in self.input_params and "cos-password" in self.input_params: - cred_provider = providers.StaticProvider( - access_key=self.input_params.get("cos-user"), - secret_key=self.input_params.get("cos-password"), - ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: - cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: - cred_provider = providers.IamAwsProvider() - else: - raise RuntimeError( - "No minio credentials provider can be initialised for current configs. " - "Please validate your runtime configuration details and retry." - ) - - # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) - - @abstractmethod - def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") - - def process_dependencies(self) -> None: - """Process dependencies - - If a dependency archive is present, it will be downloaded from object storage - and expanded into the local directory. - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing dependencies") - t0 = time.time() - archive_file = self.input_params.get("cos-dependencies-archive") - - self.get_file_from_object_storage(archive_file) - - inputs = self.input_params.get("inputs") - if inputs: - input_list = inputs.split(INOUT_SEPARATOR) - for file in input_list: - self.get_file_from_object_storage(file.strip()) - - subprocess.call(["tar", "-zxvf", archive_file]) - duration = time.time() - t0 - OpUtil.log_operation_info("dependencies processed", duration) - - def process_outputs(self) -> None: - """Process outputs - - If outputs have been specified, it will upload the appropriate files to object storage - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing outputs") - t0 = time.time() - outputs = self.input_params.get("outputs") - if outputs: - output_list = outputs.split(INOUT_SEPARATOR) - for file in output_list: - self.process_output_file(file.strip()) - duration = time.time() - t0 - OpUtil.log_operation_info("outputs processed", duration) - - def process_metrics_and_metadata(self) -> None: - """Process metrics and metadata - - This method exposes metrics/metadata that the processed - notebook | script produces in the KFP UI. - - This method should not be overridden by subclasses. - """ - - OpUtil.log_operation_info("processing metrics and metadata") - t0 = time.time() - - # Location where the KFP specific output files will be stored - # in the environment where the bootsrapper is running. - # Defaults to '/tmp' if not specified. - output_path = Path(os.getenv("ELYRA_WRITABLE_CONTAINER_DIR", "/tmp")) - - # verify that output_path exists, is a directory - # and writable by creating a temporary file in that location - try: - with TemporaryFile(mode="w", dir=output_path) as t: - t.write("can write") - except Exception: - # output_path doesn't meet the requirements - # treat this as a non-fatal error and log a warning - logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) - return - - # Name of the proprietary KFP UI metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_ui_metadata_filename = "mlpipeline-ui-metadata.json" - - # Name of the proprietary KFP metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_metrics_filename = "mlpipeline-metrics.json" - - # If the notebook | Python script produced one of the files - # copy it to the target location where KFP is looking for it. - for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: - try: - src = Path(".") / filename - logger.debug(f"Processing {src} ...") - # try to load the file, if one was created by the - # notebook or script - with open(src, "r") as f: - metadata = json.load(f) - - # the file exists and contains valid JSON - logger.debug(f"File content: {json.dumps(metadata)}") - - target = output_path / filename - # try to save the file in the destination location - with open(target, "w") as f: - json.dump(metadata, f) - except FileNotFoundError: - # The script | notebook didn't produce the file - # we are looking for. This is not an error condition - # that needs to be handled. - logger.debug(f"{self.filepath} produced no file named {src}") - except ValueError as ve: - # The file content could not be parsed. Log a warning - # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") - except Exception as ex: - # Something is wrong with the user-generated metadata file. - # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") - - # - # Augment kfp_ui_metadata_filename with Elyra-specific information: - # - link to object storage where input and output artifacts are - # stored - ui_metadata_output = output_path / kfp_ui_metadata_filename - try: - # re-load the file - with open(ui_metadata_output, "r") as f: - metadata = json.load(f) - except Exception: - # ignore all errors - metadata = {} - - # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): - metadata["outputs"] = [] - - # Define HREF for COS bucket: - # // - bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" - ) - - # add Elyra metadata to 'outputs' - metadata["outputs"].append( - { - "storage": "inline", - "source": f"## Inputs for {self.filepath}\n" - f"[{self.input_params['cos-dependencies-archive']}]({bucket_url})", - "type": "markdown", - } - ) - - # print the content of the augmented metadata file - logger.debug(f"Output UI metadata: {json.dumps(metadata)}") - - logger.debug(f"Saving UI metadata file as {ui_metadata_output} ...") - - # Save [updated] KFP UI metadata file - with open(ui_metadata_output, "w") as f: - json.dump(metadata, f) - - duration = time.time() - t0 - OpUtil.log_operation_info("metrics and metadata processed", duration) - - def get_object_storage_filename(self, filename: str) -> str: - """Function to pre-pend cloud storage working dir to file name - - :param filename: the local file - :return: the full path of the object storage file - """ - return os.path.join(self.input_params.get("cos-directory", ""), filename) - - def get_file_from_object_storage(self, file_to_get: str) -> None: - """Utility function to get files from an object storage - - :param file_to_get: filename - """ - - object_to_get = self.get_object_storage_filename(file_to_get) - t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration - ) - - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: - """Utility function to put files into an object storage - - :param file_to_upload: filename - :param object_name: remote filename (used to rename) - """ - - object_to_upload = object_name - if not object_to_upload: - object_to_upload = file_to_upload - - object_to_upload = self.get_object_storage_filename(object_to_upload) - t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration - ) - - def has_wildcard(self, filename): - wildcards = ["*", "?"] - return bool(any(c in filename for c in wildcards)) - - def process_output_file(self, output_file): - """Puts the file to object storage. Handles wildcards and directories.""" - - matched_files = [output_file] - if self.has_wildcard(output_file): # explode the wildcarded file - matched_files = glob.glob(output_file) - - for matched_file in matched_files: - if os.path.isdir(matched_file): - for file in os.listdir(matched_file): - self.process_output_file(os.path.join(matched_file, file)) - else: - self.put_file_to_object_storage(matched_file) - - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: - """Convert INOUT-separated string of pipeline parameters into a dictionary.""" - parameter_dict = {} - if pipeline_parameters: - parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) - for parameter in parameter_list: - param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): - continue # env vars must be non-empty strings - parameter_dict[param_name] = value - return parameter_dict - - def set_parameters_in_env(self) -> None: - """Make pipeline parameters available as environment variables.""" - for name, value in self.pipeline_param_dict.items(): - if name in os.environ: - continue # avoid overwriting env vars with the same name - os.environ[name] = value - - -class NotebookFileOp(FileOpBase): - """Perform Notebook File Operation""" - - def execute(self) -> None: - """Execute the Notebook and upload results to object storage""" - notebook = os.path.basename(self.filepath) - notebook_name = notebook.replace(".ipynb", "") - notebook_output = f"{notebook_name}-output.ipynb" - notebook_html = f"{notebook_name}.html" - - try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") - t0 = time.time() - # Include kernel selection in execution time - kernel_name = NotebookFileOp.find_best_kernel(notebook) - - kwargs = {} - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - import papermill - - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) - duration = time.time() - t0 - OpUtil.log_operation_info("notebook execution completed", duration) - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - raise ex - - @staticmethod - def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: - """Function to convert a Jupyter notebook file (.ipynb) into an html file - - :param notebook_file: object storage client - :param html_file: name of what the html output file should be - :return: html_file: the converted notebook in html format - """ - import nbconvert - import nbformat - - OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") - t0 = time.time() - nb = nbformat.read(notebook_file, as_version=4) - html_exporter = nbconvert.HTMLExporter() - data, resources = html_exporter.from_notebook_node(nb) - with open(html_file, "w") as f: - f.write(data) - f.close() - - duration = time.time() - t0 - OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) - return html_file - - @staticmethod - def find_best_kernel(notebook_file: str) -> str: - """Determines the best kernel to use via the following algorithm: - - 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. - 2. Gets the list of configured kernels using KernelSpecManager. - 3. If notebook kernel_name is in list, use that, else - 4. If not found, load each configured kernel.json file and find a language match. - 5. On first match, log info message regarding the switch and use that kernel. - 6. If no language match is found, revert to notebook kernel and log warning message. - """ - from jupyter_client.kernelspec import KernelSpecManager - import nbformat - - nb = nbformat.read(notebook_file, 4) - - nb_kspec = nb.metadata.kernelspec - nb_kernel_name = nb_kspec.get("name") - nb_kernel_lang = nb_kspec.get("language") - - kernel_specs = KernelSpecManager().find_kernel_specs() - - # see if we have a direct match... - if nb_kernel_name in kernel_specs.keys(): - return nb_kernel_name - - # no match found for kernel, try matching language... - for name, file in kernel_specs.items(): - # load file (JSON) and pick out language, if match, use first found - with open(os.path.join(file, "kernel.json")) as f: - kspec = json.load(f) - if kspec.get("language").lower() == nb_kernel_lang.lower(): - matched_kernel = os.path.basename(file) - logger.info( - f"Matched kernel by language ({nb_kernel_lang}), using kernel " - f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'." - ) - return matched_kernel - - # no match found for language, return notebook kernel and let execution fail - logger.warning( - f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " - f"language match ({nb_kernel_lang}) was found in current kernel specifications." - ) - return nb_kernel_name - - -class PythonFileOp(FileOpBase): - """Perform Python File Operation""" - - def execute(self) -> None: - """Execute the Python script and upload results to object storage""" - python_script = os.path.basename(self.filepath) - python_script_name = python_script.replace(".py", "") - # python_script_output = f"{python_script_name}.log" - - try: - OpUtil.log_operation_info( - f"executing python script using 'python3 {python_script}'" - ) - t0 = time.time() - - run_args = ["python3", python_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------Python logs start----------------------") - # Removing support for the s3 storage of python script logs - # with open(python_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------Python logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - duration = time.time() - t0 - OpUtil.log_operation_info("python script execution completed", duration) - - # self.put_file_to_object_storage(python_script_output, python_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(python_script_output, python_script_output) - raise ex - - -class RFileOp(FileOpBase): - """Perform R File Operation""" - - def execute(self) -> None: - """Execute the R script and upload results to object storage""" - r_script = os.path.basename(self.filepath) - r_script_name = r_script.replace(".r", "") - # r_script_output = f"{r_script_name}.log" - - try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}'") - t0 = time.time() - - run_args = ["Rscript", r_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------R script logs start----------------------") - # Removing support for the s3 storage of R script logs - # with open(r_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------R script logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - - duration = time.time() - t0 - OpUtil.log_operation_info("R script execution completed", duration) - - # self.put_file_to_object_storage(r_script_output, r_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(r_script_output, r_script_output) - raise ex - - -class OpUtil(object): - """Utility functions for preparing file execution.""" - - @classmethod - def package_install(cls, user_volume_path) -> None: - OpUtil.log_operation_info("Installing packages") - t0 = time.time() - requirements_file = cls.determine_elyra_requirements() - elyra_packages = cls.package_list_to_dict(requirements_file) - current_packages = cls.package_list_to_dict("requirements-current.txt") - to_install_list = [] - - for package, ver in elyra_packages.items(): - if package in current_packages: - if current_packages[package] is None: - logger.warning( - f"WARNING: Source package '{package}' found already installed as an " - "editable package. This may conflict with the required version: " - f"{ver} . Skipping..." - ) - continue - try: - version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant - logger.warning( - f"WARNING: Source package '{package}' found already installed from " - f"{current_packages[package]}. This may conflict with the required " - f"version: {ver} . Skipping..." - ) - continue - if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") - to_install_list.append(f"{package}=={ver}") - elif version.Version(ver) < version.Version(current_packages[package]): - logger.info( - f"Newer {package} package with version {current_packages[package]} " - f"already installed. Skipping..." - ) - else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") - to_install_list.append(f"{package}=={ver}") - - if to_install_list: - if user_volume_path: - to_install_list.insert(0, f"--target={user_volume_path}") - to_install_list.append("--no-cache-dir") - - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) - - if user_volume_path: - os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" - - subprocess.run([sys.executable, "-m", "pip", "freeze"]) - duration = time.time() - t0 - OpUtil.log_operation_info("Packages installed", duration) - - @classmethod - def determine_elyra_requirements(cls) -> Any: - if sys.version_info.major == 3: - if sys.version_info.minor in [8, 9, 10, 11]: - return "requirements-elyra.txt" - logger.error( - f"This version of Python '{sys.version_info.major}.{sys.version_info.minor}' " - f"is not supported for Elyra generic components" - ) - return None - - @classmethod - def package_list_to_dict(cls, filename: str) -> dict: - package_dict = {} - with open(filename) as fh: - for line in fh: - if line[0] != "#": - if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") - elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") - elif "==" in line: - package_name, package_version = line.strip("\n").split(sep="==") - elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system - package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory - package_name = os.path.basename(package_name) - package_version = None - else: - # Tolerate other formats but do not add to package list - continue - - package_dict[package_name] = package_version - - return package_dict - - @classmethod - def parse_arguments(cls, args) -> dict: - import argparse - - global pipeline_name, operation_name - - logger.debug("Parsing Arguments.....") - parser = argparse.ArgumentParser() - parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True - ) - parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True - ) - parser.add_argument( - "-d", - "--cos-directory", - dest="cos-directory", - help="Working directory in cloud object storage bucket to use", - required=True, - ) - parser.add_argument( - "-t", - "--cos-dependencies-archive", - dest="cos-dependencies-archive", - help="Archive containing notebook and dependency artifacts", - required=True, - ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) - parser.add_argument( - "-p", - "--user-volume-path", - dest="user-volume-path", - help="Directory in Volume to install python libraries into", - required=False, - ) - parser.add_argument( - "-n", - "--pipeline-name", - dest="pipeline-name", - help="Pipeline name", - required=True, - ) - parser.add_argument( - "-r", - "--pipeline-parameters", - dest="pipeline_parameters", - help="Pipeline parameters that apply to this node", - required=False, - ) - parser.add_argument( - "-m", - "--parameter-pass-method", - dest="parameter_pass_method", - choices=["env"], - help="The method by which pipeline parameters should be applied to this node.", - required=False, - ) - parsed_args = vars(parser.parse_args(args)) - - # set pipeline name as global - pipeline_name = parsed_args.get("pipeline-name") - # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) - - return parsed_args - - @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: - """Produces a formatted log INFO message used entirely for support purposes. - - This method is intended to be called for any entries that should be captured across aggregated - log files to identify steps within a given pipeline and each of its operations. As a result, - calls to this method should produce single-line entries in the log (no embedded newlines). - Each entry is prefixed with the pipeline name. - - General logging should NOT use this method but use logger.() statements directly. - - :param action_clause: str representing the action that is being logged - :param duration_secs: optional float value representing the duration of the action being logged - """ - global pipeline_name, operation_name - if enable_pipeline_info: - duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") - - -def main(): - # Configure logger format, level - logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG - ) - # Setup packages and gather arguments - input_params = OpUtil.parse_arguments(sys.argv[1:]) - OpUtil.log_operation_info("starting operation") - t0 = time.time() - - # Create the appropriate instance, process dependencies and execute the operation - file_op = FileOpBase.get_instance(**input_params) - - file_op.process_dependencies() - - file_op.execute() - - # Process notebook | script metrics and KFP UI metadata - file_op.process_metrics_and_metadata() - - duration = time.time() - t0 - OpUtil.log_operation_info("operation completed", duration) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu b/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu index 18f56a95e2..4b9ede4d11 100644 --- a/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu +++ b/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu @@ -73,6 +73,9 @@ WORKDIR /opt/app-root/bin COPY ${MINIMAL_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${MINIMAL_SOURCE_CODE}/utils ./utils/ +# Download Elyra boostrapper.py +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ + -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/minimal/ubi9-python-3.12/utils/bootstrapper.py b/runtimes/minimal/ubi9-python-3.12/utils/bootstrapper.py deleted file mode 100644 index 8009048682..0000000000 --- a/runtimes/minimal/ubi9-python-3.12/utils/bootstrapper.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py -# -# Copyright 2018-2023 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from abc import abstractmethod -import glob -import json -import logging -import os -from pathlib import Path -import subprocess -import sys -from tempfile import TemporaryFile -import time -from typing import Any -from typing import Dict -from typing import Optional -from typing import Type -from typing import TypeVar -from urllib.parse import urljoin -from urllib.parse import urlparse -from urllib.parse import urlunparse - -from packaging import version - - -# Inputs and Outputs separator character. If updated, -# same-named variable in _notebook_op.py must be updated! -INOUT_SEPARATOR = ";" - -# Setup forward reference for type hint on return from class factory method. See -# https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 -F = TypeVar("F", bound="FileOpBase") - -logger = logging.getLogger("elyra") -enable_pipeline_info = os.getenv("ELYRA_ENABLE_PIPELINE_INFO", "true").lower() == "true" -pipeline_name = None # global used in formatted logging -operation_name = None # global used in formatted logging - - -class FileOpBase(ABC): - """Abstract base class for file-based operations""" - - filepath = None - cos_client = None - cos_bucket = None - - @classmethod - def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" - filepath = kwargs["filepath"] - if ".ipynb" in filepath: - return NotebookFileOp(**kwargs) - elif ".py" in filepath: - return PythonFileOp(**kwargs) - elif ".r" in filepath: - return RFileOp(**kwargs) - else: - raise ValueError(f"Unsupported file type: {filepath}") - - def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" - import minio - from minio.credentials import providers - - self.filepath = kwargs["filepath"] - self.input_params = kwargs or {} - self.cos_endpoint = urlparse(self.input_params.get("cos-endpoint")) - self.cos_bucket = self.input_params.get("cos-bucket") - - self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) - - # Infer secure from the endpoint's scheme. - self.secure = self.cos_endpoint.scheme == "https" - - # get minio credentials provider - if "cos-user" in self.input_params and "cos-password" in self.input_params: - cred_provider = providers.StaticProvider( - access_key=self.input_params.get("cos-user"), - secret_key=self.input_params.get("cos-password"), - ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: - cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: - cred_provider = providers.IamAwsProvider() - else: - raise RuntimeError( - "No minio credentials provider can be initialised for current configs. " - "Please validate your runtime configuration details and retry." - ) - - # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) - - @abstractmethod - def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") - - def process_dependencies(self) -> None: - """Process dependencies - - If a dependency archive is present, it will be downloaded from object storage - and expanded into the local directory. - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing dependencies") - t0 = time.time() - archive_file = self.input_params.get("cos-dependencies-archive") - - self.get_file_from_object_storage(archive_file) - - inputs = self.input_params.get("inputs") - if inputs: - input_list = inputs.split(INOUT_SEPARATOR) - for file in input_list: - self.get_file_from_object_storage(file.strip()) - - subprocess.call(["tar", "-zxvf", archive_file]) - duration = time.time() - t0 - OpUtil.log_operation_info("dependencies processed", duration) - - def process_outputs(self) -> None: - """Process outputs - - If outputs have been specified, it will upload the appropriate files to object storage - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing outputs") - t0 = time.time() - outputs = self.input_params.get("outputs") - if outputs: - output_list = outputs.split(INOUT_SEPARATOR) - for file in output_list: - self.process_output_file(file.strip()) - duration = time.time() - t0 - OpUtil.log_operation_info("outputs processed", duration) - - def process_metrics_and_metadata(self) -> None: - """Process metrics and metadata - - This method exposes metrics/metadata that the processed - notebook | script produces in the KFP UI. - - This method should not be overridden by subclasses. - """ - - OpUtil.log_operation_info("processing metrics and metadata") - t0 = time.time() - - # Location where the KFP specific output files will be stored - # in the environment where the bootsrapper is running. - # Defaults to '/tmp' if not specified. - output_path = Path(os.getenv("ELYRA_WRITABLE_CONTAINER_DIR", "/tmp")) - - # verify that output_path exists, is a directory - # and writable by creating a temporary file in that location - try: - with TemporaryFile(mode="w", dir=output_path) as t: - t.write("can write") - except Exception: - # output_path doesn't meet the requirements - # treat this as a non-fatal error and log a warning - logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) - return - - # Name of the proprietary KFP UI metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_ui_metadata_filename = "mlpipeline-ui-metadata.json" - - # Name of the proprietary KFP metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_metrics_filename = "mlpipeline-metrics.json" - - # If the notebook | Python script produced one of the files - # copy it to the target location where KFP is looking for it. - for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: - try: - src = Path(".") / filename - logger.debug(f"Processing {src} ...") - # try to load the file, if one was created by the - # notebook or script - with open(src, "r") as f: - metadata = json.load(f) - - # the file exists and contains valid JSON - logger.debug(f"File content: {json.dumps(metadata)}") - - target = output_path / filename - # try to save the file in the destination location - with open(target, "w") as f: - json.dump(metadata, f) - except FileNotFoundError: - # The script | notebook didn't produce the file - # we are looking for. This is not an error condition - # that needs to be handled. - logger.debug(f"{self.filepath} produced no file named {src}") - except ValueError as ve: - # The file content could not be parsed. Log a warning - # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") - except Exception as ex: - # Something is wrong with the user-generated metadata file. - # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") - - # - # Augment kfp_ui_metadata_filename with Elyra-specific information: - # - link to object storage where input and output artifacts are - # stored - ui_metadata_output = output_path / kfp_ui_metadata_filename - try: - # re-load the file - with open(ui_metadata_output, "r") as f: - metadata = json.load(f) - except Exception: - # ignore all errors - metadata = {} - - # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): - metadata["outputs"] = [] - - # Define HREF for COS bucket: - # // - bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" - ) - - # add Elyra metadata to 'outputs' - metadata["outputs"].append( - { - "storage": "inline", - "source": f"## Inputs for {self.filepath}\n" - f"[{self.input_params['cos-dependencies-archive']}]({bucket_url})", - "type": "markdown", - } - ) - - # print the content of the augmented metadata file - logger.debug(f"Output UI metadata: {json.dumps(metadata)}") - - logger.debug(f"Saving UI metadata file as {ui_metadata_output} ...") - - # Save [updated] KFP UI metadata file - with open(ui_metadata_output, "w") as f: - json.dump(metadata, f) - - duration = time.time() - t0 - OpUtil.log_operation_info("metrics and metadata processed", duration) - - def get_object_storage_filename(self, filename: str) -> str: - """Function to pre-pend cloud storage working dir to file name - - :param filename: the local file - :return: the full path of the object storage file - """ - return os.path.join(self.input_params.get("cos-directory", ""), filename) - - def get_file_from_object_storage(self, file_to_get: str) -> None: - """Utility function to get files from an object storage - - :param file_to_get: filename - """ - - object_to_get = self.get_object_storage_filename(file_to_get) - t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration - ) - - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: - """Utility function to put files into an object storage - - :param file_to_upload: filename - :param object_name: remote filename (used to rename) - """ - - object_to_upload = object_name - if not object_to_upload: - object_to_upload = file_to_upload - - object_to_upload = self.get_object_storage_filename(object_to_upload) - t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration - ) - - def has_wildcard(self, filename): - wildcards = ["*", "?"] - return bool(any(c in filename for c in wildcards)) - - def process_output_file(self, output_file): - """Puts the file to object storage. Handles wildcards and directories.""" - - matched_files = [output_file] - if self.has_wildcard(output_file): # explode the wildcarded file - matched_files = glob.glob(output_file) - - for matched_file in matched_files: - if os.path.isdir(matched_file): - for file in os.listdir(matched_file): - self.process_output_file(os.path.join(matched_file, file)) - else: - self.put_file_to_object_storage(matched_file) - - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: - """Convert INOUT-separated string of pipeline parameters into a dictionary.""" - parameter_dict = {} - if pipeline_parameters: - parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) - for parameter in parameter_list: - param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): - continue # env vars must be non-empty strings - parameter_dict[param_name] = value - return parameter_dict - - def set_parameters_in_env(self) -> None: - """Make pipeline parameters available as environment variables.""" - for name, value in self.pipeline_param_dict.items(): - if name in os.environ: - continue # avoid overwriting env vars with the same name - os.environ[name] = value - - -class NotebookFileOp(FileOpBase): - """Perform Notebook File Operation""" - - def execute(self) -> None: - """Execute the Notebook and upload results to object storage""" - notebook = os.path.basename(self.filepath) - notebook_name = notebook.replace(".ipynb", "") - notebook_output = f"{notebook_name}-output.ipynb" - notebook_html = f"{notebook_name}.html" - - try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") - t0 = time.time() - # Include kernel selection in execution time - kernel_name = NotebookFileOp.find_best_kernel(notebook) - - kwargs = {} - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - import papermill - - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) - duration = time.time() - t0 - OpUtil.log_operation_info("notebook execution completed", duration) - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - raise ex - - @staticmethod - def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: - """Function to convert a Jupyter notebook file (.ipynb) into an html file - - :param notebook_file: object storage client - :param html_file: name of what the html output file should be - :return: html_file: the converted notebook in html format - """ - import nbconvert - import nbformat - - OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") - t0 = time.time() - nb = nbformat.read(notebook_file, as_version=4) - html_exporter = nbconvert.HTMLExporter() - data, resources = html_exporter.from_notebook_node(nb) - with open(html_file, "w") as f: - f.write(data) - f.close() - - duration = time.time() - t0 - OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) - return html_file - - @staticmethod - def find_best_kernel(notebook_file: str) -> str: - """Determines the best kernel to use via the following algorithm: - - 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. - 2. Gets the list of configured kernels using KernelSpecManager. - 3. If notebook kernel_name is in list, use that, else - 4. If not found, load each configured kernel.json file and find a language match. - 5. On first match, log info message regarding the switch and use that kernel. - 6. If no language match is found, revert to notebook kernel and log warning message. - """ - from jupyter_client.kernelspec import KernelSpecManager - import nbformat - - nb = nbformat.read(notebook_file, 4) - - nb_kspec = nb.metadata.kernelspec - nb_kernel_name = nb_kspec.get("name") - nb_kernel_lang = nb_kspec.get("language") - - kernel_specs = KernelSpecManager().find_kernel_specs() - - # see if we have a direct match... - if nb_kernel_name in kernel_specs.keys(): - return nb_kernel_name - - # no match found for kernel, try matching language... - for name, file in kernel_specs.items(): - # load file (JSON) and pick out language, if match, use first found - with open(os.path.join(file, "kernel.json")) as f: - kspec = json.load(f) - if kspec.get("language").lower() == nb_kernel_lang.lower(): - matched_kernel = os.path.basename(file) - logger.info( - f"Matched kernel by language ({nb_kernel_lang}), using kernel " - f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'." - ) - return matched_kernel - - # no match found for language, return notebook kernel and let execution fail - logger.warning( - f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " - f"language match ({nb_kernel_lang}) was found in current kernel specifications." - ) - return nb_kernel_name - - -class PythonFileOp(FileOpBase): - """Perform Python File Operation""" - - def execute(self) -> None: - """Execute the Python script and upload results to object storage""" - python_script = os.path.basename(self.filepath) - python_script_name = python_script.replace(".py", "") - # python_script_output = f"{python_script_name}.log" - - try: - OpUtil.log_operation_info( - f"executing python script using 'python3 {python_script}'" - ) - t0 = time.time() - - run_args = ["python3", python_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------Python logs start----------------------") - # Removing support for the s3 storage of python script logs - # with open(python_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------Python logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - duration = time.time() - t0 - OpUtil.log_operation_info("python script execution completed", duration) - - # self.put_file_to_object_storage(python_script_output, python_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(python_script_output, python_script_output) - raise ex - - -class RFileOp(FileOpBase): - """Perform R File Operation""" - - def execute(self) -> None: - """Execute the R script and upload results to object storage""" - r_script = os.path.basename(self.filepath) - r_script_name = r_script.replace(".r", "") - # r_script_output = f"{r_script_name}.log" - - try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}'") - t0 = time.time() - - run_args = ["Rscript", r_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------R script logs start----------------------") - # Removing support for the s3 storage of R script logs - # with open(r_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------R script logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - - duration = time.time() - t0 - OpUtil.log_operation_info("R script execution completed", duration) - - # self.put_file_to_object_storage(r_script_output, r_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(r_script_output, r_script_output) - raise ex - - -class OpUtil(object): - """Utility functions for preparing file execution.""" - - @classmethod - def package_install(cls, user_volume_path) -> None: - OpUtil.log_operation_info("Installing packages") - t0 = time.time() - requirements_file = cls.determine_elyra_requirements() - elyra_packages = cls.package_list_to_dict(requirements_file) - current_packages = cls.package_list_to_dict("requirements-current.txt") - to_install_list = [] - - for package, ver in elyra_packages.items(): - if package in current_packages: - if current_packages[package] is None: - logger.warning( - f"WARNING: Source package '{package}' found already installed as an " - "editable package. This may conflict with the required version: " - f"{ver} . Skipping..." - ) - continue - try: - version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant - logger.warning( - f"WARNING: Source package '{package}' found already installed from " - f"{current_packages[package]}. This may conflict with the required " - f"version: {ver} . Skipping..." - ) - continue - if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") - to_install_list.append(f"{package}=={ver}") - elif version.Version(ver) < version.Version(current_packages[package]): - logger.info( - f"Newer {package} package with version {current_packages[package]} " - f"already installed. Skipping..." - ) - else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") - to_install_list.append(f"{package}=={ver}") - - if to_install_list: - if user_volume_path: - to_install_list.insert(0, f"--target={user_volume_path}") - to_install_list.append("--no-cache-dir") - - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) - - if user_volume_path: - os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" - - subprocess.run([sys.executable, "-m", "pip", "freeze"]) - duration = time.time() - t0 - OpUtil.log_operation_info("Packages installed", duration) - - @classmethod - def determine_elyra_requirements(cls) -> Any: - if sys.version_info.major == 3: - if sys.version_info.minor in [8, 9, 10, 11]: - return "requirements-elyra.txt" - logger.error( - f"This version of Python '{sys.version_info.major}.{sys.version_info.minor}' " - f"is not supported for Elyra generic components" - ) - return None - - @classmethod - def package_list_to_dict(cls, filename: str) -> dict: - package_dict = {} - with open(filename) as fh: - for line in fh: - if line[0] != "#": - if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") - elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") - elif "==" in line: - package_name, package_version = line.strip("\n").split(sep="==") - elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system - package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory - package_name = os.path.basename(package_name) - package_version = None - else: - # Tolerate other formats but do not add to package list - continue - - package_dict[package_name] = package_version - - return package_dict - - @classmethod - def parse_arguments(cls, args) -> dict: - import argparse - - global pipeline_name, operation_name - - logger.debug("Parsing Arguments.....") - parser = argparse.ArgumentParser() - parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True - ) - parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True - ) - parser.add_argument( - "-d", - "--cos-directory", - dest="cos-directory", - help="Working directory in cloud object storage bucket to use", - required=True, - ) - parser.add_argument( - "-t", - "--cos-dependencies-archive", - dest="cos-dependencies-archive", - help="Archive containing notebook and dependency artifacts", - required=True, - ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) - parser.add_argument( - "-p", - "--user-volume-path", - dest="user-volume-path", - help="Directory in Volume to install python libraries into", - required=False, - ) - parser.add_argument( - "-n", - "--pipeline-name", - dest="pipeline-name", - help="Pipeline name", - required=True, - ) - parser.add_argument( - "-r", - "--pipeline-parameters", - dest="pipeline_parameters", - help="Pipeline parameters that apply to this node", - required=False, - ) - parser.add_argument( - "-m", - "--parameter-pass-method", - dest="parameter_pass_method", - choices=["env"], - help="The method by which pipeline parameters should be applied to this node.", - required=False, - ) - parsed_args = vars(parser.parse_args(args)) - - # set pipeline name as global - pipeline_name = parsed_args.get("pipeline-name") - # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) - - return parsed_args - - @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: - """Produces a formatted log INFO message used entirely for support purposes. - - This method is intended to be called for any entries that should be captured across aggregated - log files to identify steps within a given pipeline and each of its operations. As a result, - calls to this method should produce single-line entries in the log (no embedded newlines). - Each entry is prefixed with the pipeline name. - - General logging should NOT use this method but use logger.() statements directly. - - :param action_clause: str representing the action that is being logged - :param duration_secs: optional float value representing the duration of the action being logged - """ - global pipeline_name, operation_name - if enable_pipeline_info: - duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") - - -def main(): - # Configure logger format, level - logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG - ) - # Setup packages and gather arguments - input_params = OpUtil.parse_arguments(sys.argv[1:]) - OpUtil.log_operation_info("starting operation") - t0 = time.time() - - # Create the appropriate instance, process dependencies and execute the operation - file_op = FileOpBase.get_instance(**input_params) - - file_op.process_dependencies() - - file_op.execute() - - # Process notebook | script metrics and KFP UI metadata - file_op.process_metrics_and_metadata() - - duration = time.time() - t0 - OpUtil.log_operation_info("operation completed", duration) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda b/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda index 55a2a195c9..035980a4aa 100644 --- a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda @@ -68,6 +68,9 @@ WORKDIR /opt/app-root/bin COPY ${PYTORCH_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ +# Download Elyra boostrapper.py +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ + -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/utils/bootstrapper.py b/runtimes/pytorch+llmcompressor/ubi9-python-3.12/utils/bootstrapper.py deleted file mode 100644 index 8009048682..0000000000 --- a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/utils/bootstrapper.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py -# -# Copyright 2018-2023 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from abc import abstractmethod -import glob -import json -import logging -import os -from pathlib import Path -import subprocess -import sys -from tempfile import TemporaryFile -import time -from typing import Any -from typing import Dict -from typing import Optional -from typing import Type -from typing import TypeVar -from urllib.parse import urljoin -from urllib.parse import urlparse -from urllib.parse import urlunparse - -from packaging import version - - -# Inputs and Outputs separator character. If updated, -# same-named variable in _notebook_op.py must be updated! -INOUT_SEPARATOR = ";" - -# Setup forward reference for type hint on return from class factory method. See -# https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 -F = TypeVar("F", bound="FileOpBase") - -logger = logging.getLogger("elyra") -enable_pipeline_info = os.getenv("ELYRA_ENABLE_PIPELINE_INFO", "true").lower() == "true" -pipeline_name = None # global used in formatted logging -operation_name = None # global used in formatted logging - - -class FileOpBase(ABC): - """Abstract base class for file-based operations""" - - filepath = None - cos_client = None - cos_bucket = None - - @classmethod - def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" - filepath = kwargs["filepath"] - if ".ipynb" in filepath: - return NotebookFileOp(**kwargs) - elif ".py" in filepath: - return PythonFileOp(**kwargs) - elif ".r" in filepath: - return RFileOp(**kwargs) - else: - raise ValueError(f"Unsupported file type: {filepath}") - - def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" - import minio - from minio.credentials import providers - - self.filepath = kwargs["filepath"] - self.input_params = kwargs or {} - self.cos_endpoint = urlparse(self.input_params.get("cos-endpoint")) - self.cos_bucket = self.input_params.get("cos-bucket") - - self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) - - # Infer secure from the endpoint's scheme. - self.secure = self.cos_endpoint.scheme == "https" - - # get minio credentials provider - if "cos-user" in self.input_params and "cos-password" in self.input_params: - cred_provider = providers.StaticProvider( - access_key=self.input_params.get("cos-user"), - secret_key=self.input_params.get("cos-password"), - ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: - cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: - cred_provider = providers.IamAwsProvider() - else: - raise RuntimeError( - "No minio credentials provider can be initialised for current configs. " - "Please validate your runtime configuration details and retry." - ) - - # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) - - @abstractmethod - def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") - - def process_dependencies(self) -> None: - """Process dependencies - - If a dependency archive is present, it will be downloaded from object storage - and expanded into the local directory. - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing dependencies") - t0 = time.time() - archive_file = self.input_params.get("cos-dependencies-archive") - - self.get_file_from_object_storage(archive_file) - - inputs = self.input_params.get("inputs") - if inputs: - input_list = inputs.split(INOUT_SEPARATOR) - for file in input_list: - self.get_file_from_object_storage(file.strip()) - - subprocess.call(["tar", "-zxvf", archive_file]) - duration = time.time() - t0 - OpUtil.log_operation_info("dependencies processed", duration) - - def process_outputs(self) -> None: - """Process outputs - - If outputs have been specified, it will upload the appropriate files to object storage - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing outputs") - t0 = time.time() - outputs = self.input_params.get("outputs") - if outputs: - output_list = outputs.split(INOUT_SEPARATOR) - for file in output_list: - self.process_output_file(file.strip()) - duration = time.time() - t0 - OpUtil.log_operation_info("outputs processed", duration) - - def process_metrics_and_metadata(self) -> None: - """Process metrics and metadata - - This method exposes metrics/metadata that the processed - notebook | script produces in the KFP UI. - - This method should not be overridden by subclasses. - """ - - OpUtil.log_operation_info("processing metrics and metadata") - t0 = time.time() - - # Location where the KFP specific output files will be stored - # in the environment where the bootsrapper is running. - # Defaults to '/tmp' if not specified. - output_path = Path(os.getenv("ELYRA_WRITABLE_CONTAINER_DIR", "/tmp")) - - # verify that output_path exists, is a directory - # and writable by creating a temporary file in that location - try: - with TemporaryFile(mode="w", dir=output_path) as t: - t.write("can write") - except Exception: - # output_path doesn't meet the requirements - # treat this as a non-fatal error and log a warning - logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) - return - - # Name of the proprietary KFP UI metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_ui_metadata_filename = "mlpipeline-ui-metadata.json" - - # Name of the proprietary KFP metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_metrics_filename = "mlpipeline-metrics.json" - - # If the notebook | Python script produced one of the files - # copy it to the target location where KFP is looking for it. - for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: - try: - src = Path(".") / filename - logger.debug(f"Processing {src} ...") - # try to load the file, if one was created by the - # notebook or script - with open(src, "r") as f: - metadata = json.load(f) - - # the file exists and contains valid JSON - logger.debug(f"File content: {json.dumps(metadata)}") - - target = output_path / filename - # try to save the file in the destination location - with open(target, "w") as f: - json.dump(metadata, f) - except FileNotFoundError: - # The script | notebook didn't produce the file - # we are looking for. This is not an error condition - # that needs to be handled. - logger.debug(f"{self.filepath} produced no file named {src}") - except ValueError as ve: - # The file content could not be parsed. Log a warning - # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") - except Exception as ex: - # Something is wrong with the user-generated metadata file. - # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") - - # - # Augment kfp_ui_metadata_filename with Elyra-specific information: - # - link to object storage where input and output artifacts are - # stored - ui_metadata_output = output_path / kfp_ui_metadata_filename - try: - # re-load the file - with open(ui_metadata_output, "r") as f: - metadata = json.load(f) - except Exception: - # ignore all errors - metadata = {} - - # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): - metadata["outputs"] = [] - - # Define HREF for COS bucket: - # // - bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" - ) - - # add Elyra metadata to 'outputs' - metadata["outputs"].append( - { - "storage": "inline", - "source": f"## Inputs for {self.filepath}\n" - f"[{self.input_params['cos-dependencies-archive']}]({bucket_url})", - "type": "markdown", - } - ) - - # print the content of the augmented metadata file - logger.debug(f"Output UI metadata: {json.dumps(metadata)}") - - logger.debug(f"Saving UI metadata file as {ui_metadata_output} ...") - - # Save [updated] KFP UI metadata file - with open(ui_metadata_output, "w") as f: - json.dump(metadata, f) - - duration = time.time() - t0 - OpUtil.log_operation_info("metrics and metadata processed", duration) - - def get_object_storage_filename(self, filename: str) -> str: - """Function to pre-pend cloud storage working dir to file name - - :param filename: the local file - :return: the full path of the object storage file - """ - return os.path.join(self.input_params.get("cos-directory", ""), filename) - - def get_file_from_object_storage(self, file_to_get: str) -> None: - """Utility function to get files from an object storage - - :param file_to_get: filename - """ - - object_to_get = self.get_object_storage_filename(file_to_get) - t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration - ) - - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: - """Utility function to put files into an object storage - - :param file_to_upload: filename - :param object_name: remote filename (used to rename) - """ - - object_to_upload = object_name - if not object_to_upload: - object_to_upload = file_to_upload - - object_to_upload = self.get_object_storage_filename(object_to_upload) - t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration - ) - - def has_wildcard(self, filename): - wildcards = ["*", "?"] - return bool(any(c in filename for c in wildcards)) - - def process_output_file(self, output_file): - """Puts the file to object storage. Handles wildcards and directories.""" - - matched_files = [output_file] - if self.has_wildcard(output_file): # explode the wildcarded file - matched_files = glob.glob(output_file) - - for matched_file in matched_files: - if os.path.isdir(matched_file): - for file in os.listdir(matched_file): - self.process_output_file(os.path.join(matched_file, file)) - else: - self.put_file_to_object_storage(matched_file) - - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: - """Convert INOUT-separated string of pipeline parameters into a dictionary.""" - parameter_dict = {} - if pipeline_parameters: - parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) - for parameter in parameter_list: - param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): - continue # env vars must be non-empty strings - parameter_dict[param_name] = value - return parameter_dict - - def set_parameters_in_env(self) -> None: - """Make pipeline parameters available as environment variables.""" - for name, value in self.pipeline_param_dict.items(): - if name in os.environ: - continue # avoid overwriting env vars with the same name - os.environ[name] = value - - -class NotebookFileOp(FileOpBase): - """Perform Notebook File Operation""" - - def execute(self) -> None: - """Execute the Notebook and upload results to object storage""" - notebook = os.path.basename(self.filepath) - notebook_name = notebook.replace(".ipynb", "") - notebook_output = f"{notebook_name}-output.ipynb" - notebook_html = f"{notebook_name}.html" - - try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") - t0 = time.time() - # Include kernel selection in execution time - kernel_name = NotebookFileOp.find_best_kernel(notebook) - - kwargs = {} - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - import papermill - - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) - duration = time.time() - t0 - OpUtil.log_operation_info("notebook execution completed", duration) - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - raise ex - - @staticmethod - def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: - """Function to convert a Jupyter notebook file (.ipynb) into an html file - - :param notebook_file: object storage client - :param html_file: name of what the html output file should be - :return: html_file: the converted notebook in html format - """ - import nbconvert - import nbformat - - OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") - t0 = time.time() - nb = nbformat.read(notebook_file, as_version=4) - html_exporter = nbconvert.HTMLExporter() - data, resources = html_exporter.from_notebook_node(nb) - with open(html_file, "w") as f: - f.write(data) - f.close() - - duration = time.time() - t0 - OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) - return html_file - - @staticmethod - def find_best_kernel(notebook_file: str) -> str: - """Determines the best kernel to use via the following algorithm: - - 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. - 2. Gets the list of configured kernels using KernelSpecManager. - 3. If notebook kernel_name is in list, use that, else - 4. If not found, load each configured kernel.json file and find a language match. - 5. On first match, log info message regarding the switch and use that kernel. - 6. If no language match is found, revert to notebook kernel and log warning message. - """ - from jupyter_client.kernelspec import KernelSpecManager - import nbformat - - nb = nbformat.read(notebook_file, 4) - - nb_kspec = nb.metadata.kernelspec - nb_kernel_name = nb_kspec.get("name") - nb_kernel_lang = nb_kspec.get("language") - - kernel_specs = KernelSpecManager().find_kernel_specs() - - # see if we have a direct match... - if nb_kernel_name in kernel_specs.keys(): - return nb_kernel_name - - # no match found for kernel, try matching language... - for name, file in kernel_specs.items(): - # load file (JSON) and pick out language, if match, use first found - with open(os.path.join(file, "kernel.json")) as f: - kspec = json.load(f) - if kspec.get("language").lower() == nb_kernel_lang.lower(): - matched_kernel = os.path.basename(file) - logger.info( - f"Matched kernel by language ({nb_kernel_lang}), using kernel " - f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'." - ) - return matched_kernel - - # no match found for language, return notebook kernel and let execution fail - logger.warning( - f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " - f"language match ({nb_kernel_lang}) was found in current kernel specifications." - ) - return nb_kernel_name - - -class PythonFileOp(FileOpBase): - """Perform Python File Operation""" - - def execute(self) -> None: - """Execute the Python script and upload results to object storage""" - python_script = os.path.basename(self.filepath) - python_script_name = python_script.replace(".py", "") - # python_script_output = f"{python_script_name}.log" - - try: - OpUtil.log_operation_info( - f"executing python script using 'python3 {python_script}'" - ) - t0 = time.time() - - run_args = ["python3", python_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------Python logs start----------------------") - # Removing support for the s3 storage of python script logs - # with open(python_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------Python logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - duration = time.time() - t0 - OpUtil.log_operation_info("python script execution completed", duration) - - # self.put_file_to_object_storage(python_script_output, python_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(python_script_output, python_script_output) - raise ex - - -class RFileOp(FileOpBase): - """Perform R File Operation""" - - def execute(self) -> None: - """Execute the R script and upload results to object storage""" - r_script = os.path.basename(self.filepath) - r_script_name = r_script.replace(".r", "") - # r_script_output = f"{r_script_name}.log" - - try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}'") - t0 = time.time() - - run_args = ["Rscript", r_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------R script logs start----------------------") - # Removing support for the s3 storage of R script logs - # with open(r_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------R script logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - - duration = time.time() - t0 - OpUtil.log_operation_info("R script execution completed", duration) - - # self.put_file_to_object_storage(r_script_output, r_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(r_script_output, r_script_output) - raise ex - - -class OpUtil(object): - """Utility functions for preparing file execution.""" - - @classmethod - def package_install(cls, user_volume_path) -> None: - OpUtil.log_operation_info("Installing packages") - t0 = time.time() - requirements_file = cls.determine_elyra_requirements() - elyra_packages = cls.package_list_to_dict(requirements_file) - current_packages = cls.package_list_to_dict("requirements-current.txt") - to_install_list = [] - - for package, ver in elyra_packages.items(): - if package in current_packages: - if current_packages[package] is None: - logger.warning( - f"WARNING: Source package '{package}' found already installed as an " - "editable package. This may conflict with the required version: " - f"{ver} . Skipping..." - ) - continue - try: - version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant - logger.warning( - f"WARNING: Source package '{package}' found already installed from " - f"{current_packages[package]}. This may conflict with the required " - f"version: {ver} . Skipping..." - ) - continue - if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") - to_install_list.append(f"{package}=={ver}") - elif version.Version(ver) < version.Version(current_packages[package]): - logger.info( - f"Newer {package} package with version {current_packages[package]} " - f"already installed. Skipping..." - ) - else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") - to_install_list.append(f"{package}=={ver}") - - if to_install_list: - if user_volume_path: - to_install_list.insert(0, f"--target={user_volume_path}") - to_install_list.append("--no-cache-dir") - - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) - - if user_volume_path: - os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" - - subprocess.run([sys.executable, "-m", "pip", "freeze"]) - duration = time.time() - t0 - OpUtil.log_operation_info("Packages installed", duration) - - @classmethod - def determine_elyra_requirements(cls) -> Any: - if sys.version_info.major == 3: - if sys.version_info.minor in [8, 9, 10, 11]: - return "requirements-elyra.txt" - logger.error( - f"This version of Python '{sys.version_info.major}.{sys.version_info.minor}' " - f"is not supported for Elyra generic components" - ) - return None - - @classmethod - def package_list_to_dict(cls, filename: str) -> dict: - package_dict = {} - with open(filename) as fh: - for line in fh: - if line[0] != "#": - if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") - elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") - elif "==" in line: - package_name, package_version = line.strip("\n").split(sep="==") - elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system - package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory - package_name = os.path.basename(package_name) - package_version = None - else: - # Tolerate other formats but do not add to package list - continue - - package_dict[package_name] = package_version - - return package_dict - - @classmethod - def parse_arguments(cls, args) -> dict: - import argparse - - global pipeline_name, operation_name - - logger.debug("Parsing Arguments.....") - parser = argparse.ArgumentParser() - parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True - ) - parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True - ) - parser.add_argument( - "-d", - "--cos-directory", - dest="cos-directory", - help="Working directory in cloud object storage bucket to use", - required=True, - ) - parser.add_argument( - "-t", - "--cos-dependencies-archive", - dest="cos-dependencies-archive", - help="Archive containing notebook and dependency artifacts", - required=True, - ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) - parser.add_argument( - "-p", - "--user-volume-path", - dest="user-volume-path", - help="Directory in Volume to install python libraries into", - required=False, - ) - parser.add_argument( - "-n", - "--pipeline-name", - dest="pipeline-name", - help="Pipeline name", - required=True, - ) - parser.add_argument( - "-r", - "--pipeline-parameters", - dest="pipeline_parameters", - help="Pipeline parameters that apply to this node", - required=False, - ) - parser.add_argument( - "-m", - "--parameter-pass-method", - dest="parameter_pass_method", - choices=["env"], - help="The method by which pipeline parameters should be applied to this node.", - required=False, - ) - parsed_args = vars(parser.parse_args(args)) - - # set pipeline name as global - pipeline_name = parsed_args.get("pipeline-name") - # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) - - return parsed_args - - @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: - """Produces a formatted log INFO message used entirely for support purposes. - - This method is intended to be called for any entries that should be captured across aggregated - log files to identify steps within a given pipeline and each of its operations. As a result, - calls to this method should produce single-line entries in the log (no embedded newlines). - Each entry is prefixed with the pipeline name. - - General logging should NOT use this method but use logger.() statements directly. - - :param action_clause: str representing the action that is being logged - :param duration_secs: optional float value representing the duration of the action being logged - """ - global pipeline_name, operation_name - if enable_pipeline_info: - duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") - - -def main(): - # Configure logger format, level - logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG - ) - # Setup packages and gather arguments - input_params = OpUtil.parse_arguments(sys.argv[1:]) - OpUtil.log_operation_info("starting operation") - t0 = time.time() - - # Create the appropriate instance, process dependencies and execute the operation - file_op = FileOpBase.get_instance(**input_params) - - file_op.process_dependencies() - - file_op.execute() - - # Process notebook | script metrics and KFP UI metadata - file_op.process_metrics_and_metadata() - - duration = time.time() - t0 - OpUtil.log_operation_info("operation completed", duration) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda b/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda index 5a79faea93..8a00819859 100644 --- a/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda @@ -68,6 +68,9 @@ WORKDIR /opt/app-root/bin COPY ${PYTORCH_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ +# Download Elyra boostrapper.py +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ + -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/pytorch/ubi9-python-3.12/utils/bootstrapper.py b/runtimes/pytorch/ubi9-python-3.12/utils/bootstrapper.py deleted file mode 100644 index 8009048682..0000000000 --- a/runtimes/pytorch/ubi9-python-3.12/utils/bootstrapper.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py -# -# Copyright 2018-2023 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from abc import abstractmethod -import glob -import json -import logging -import os -from pathlib import Path -import subprocess -import sys -from tempfile import TemporaryFile -import time -from typing import Any -from typing import Dict -from typing import Optional -from typing import Type -from typing import TypeVar -from urllib.parse import urljoin -from urllib.parse import urlparse -from urllib.parse import urlunparse - -from packaging import version - - -# Inputs and Outputs separator character. If updated, -# same-named variable in _notebook_op.py must be updated! -INOUT_SEPARATOR = ";" - -# Setup forward reference for type hint on return from class factory method. See -# https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 -F = TypeVar("F", bound="FileOpBase") - -logger = logging.getLogger("elyra") -enable_pipeline_info = os.getenv("ELYRA_ENABLE_PIPELINE_INFO", "true").lower() == "true" -pipeline_name = None # global used in formatted logging -operation_name = None # global used in formatted logging - - -class FileOpBase(ABC): - """Abstract base class for file-based operations""" - - filepath = None - cos_client = None - cos_bucket = None - - @classmethod - def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" - filepath = kwargs["filepath"] - if ".ipynb" in filepath: - return NotebookFileOp(**kwargs) - elif ".py" in filepath: - return PythonFileOp(**kwargs) - elif ".r" in filepath: - return RFileOp(**kwargs) - else: - raise ValueError(f"Unsupported file type: {filepath}") - - def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" - import minio - from minio.credentials import providers - - self.filepath = kwargs["filepath"] - self.input_params = kwargs or {} - self.cos_endpoint = urlparse(self.input_params.get("cos-endpoint")) - self.cos_bucket = self.input_params.get("cos-bucket") - - self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) - - # Infer secure from the endpoint's scheme. - self.secure = self.cos_endpoint.scheme == "https" - - # get minio credentials provider - if "cos-user" in self.input_params and "cos-password" in self.input_params: - cred_provider = providers.StaticProvider( - access_key=self.input_params.get("cos-user"), - secret_key=self.input_params.get("cos-password"), - ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: - cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: - cred_provider = providers.IamAwsProvider() - else: - raise RuntimeError( - "No minio credentials provider can be initialised for current configs. " - "Please validate your runtime configuration details and retry." - ) - - # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) - - @abstractmethod - def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") - - def process_dependencies(self) -> None: - """Process dependencies - - If a dependency archive is present, it will be downloaded from object storage - and expanded into the local directory. - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing dependencies") - t0 = time.time() - archive_file = self.input_params.get("cos-dependencies-archive") - - self.get_file_from_object_storage(archive_file) - - inputs = self.input_params.get("inputs") - if inputs: - input_list = inputs.split(INOUT_SEPARATOR) - for file in input_list: - self.get_file_from_object_storage(file.strip()) - - subprocess.call(["tar", "-zxvf", archive_file]) - duration = time.time() - t0 - OpUtil.log_operation_info("dependencies processed", duration) - - def process_outputs(self) -> None: - """Process outputs - - If outputs have been specified, it will upload the appropriate files to object storage - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing outputs") - t0 = time.time() - outputs = self.input_params.get("outputs") - if outputs: - output_list = outputs.split(INOUT_SEPARATOR) - for file in output_list: - self.process_output_file(file.strip()) - duration = time.time() - t0 - OpUtil.log_operation_info("outputs processed", duration) - - def process_metrics_and_metadata(self) -> None: - """Process metrics and metadata - - This method exposes metrics/metadata that the processed - notebook | script produces in the KFP UI. - - This method should not be overridden by subclasses. - """ - - OpUtil.log_operation_info("processing metrics and metadata") - t0 = time.time() - - # Location where the KFP specific output files will be stored - # in the environment where the bootsrapper is running. - # Defaults to '/tmp' if not specified. - output_path = Path(os.getenv("ELYRA_WRITABLE_CONTAINER_DIR", "/tmp")) - - # verify that output_path exists, is a directory - # and writable by creating a temporary file in that location - try: - with TemporaryFile(mode="w", dir=output_path) as t: - t.write("can write") - except Exception: - # output_path doesn't meet the requirements - # treat this as a non-fatal error and log a warning - logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) - return - - # Name of the proprietary KFP UI metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_ui_metadata_filename = "mlpipeline-ui-metadata.json" - - # Name of the proprietary KFP metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_metrics_filename = "mlpipeline-metrics.json" - - # If the notebook | Python script produced one of the files - # copy it to the target location where KFP is looking for it. - for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: - try: - src = Path(".") / filename - logger.debug(f"Processing {src} ...") - # try to load the file, if one was created by the - # notebook or script - with open(src, "r") as f: - metadata = json.load(f) - - # the file exists and contains valid JSON - logger.debug(f"File content: {json.dumps(metadata)}") - - target = output_path / filename - # try to save the file in the destination location - with open(target, "w") as f: - json.dump(metadata, f) - except FileNotFoundError: - # The script | notebook didn't produce the file - # we are looking for. This is not an error condition - # that needs to be handled. - logger.debug(f"{self.filepath} produced no file named {src}") - except ValueError as ve: - # The file content could not be parsed. Log a warning - # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") - except Exception as ex: - # Something is wrong with the user-generated metadata file. - # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") - - # - # Augment kfp_ui_metadata_filename with Elyra-specific information: - # - link to object storage where input and output artifacts are - # stored - ui_metadata_output = output_path / kfp_ui_metadata_filename - try: - # re-load the file - with open(ui_metadata_output, "r") as f: - metadata = json.load(f) - except Exception: - # ignore all errors - metadata = {} - - # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): - metadata["outputs"] = [] - - # Define HREF for COS bucket: - # // - bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" - ) - - # add Elyra metadata to 'outputs' - metadata["outputs"].append( - { - "storage": "inline", - "source": f"## Inputs for {self.filepath}\n" - f"[{self.input_params['cos-dependencies-archive']}]({bucket_url})", - "type": "markdown", - } - ) - - # print the content of the augmented metadata file - logger.debug(f"Output UI metadata: {json.dumps(metadata)}") - - logger.debug(f"Saving UI metadata file as {ui_metadata_output} ...") - - # Save [updated] KFP UI metadata file - with open(ui_metadata_output, "w") as f: - json.dump(metadata, f) - - duration = time.time() - t0 - OpUtil.log_operation_info("metrics and metadata processed", duration) - - def get_object_storage_filename(self, filename: str) -> str: - """Function to pre-pend cloud storage working dir to file name - - :param filename: the local file - :return: the full path of the object storage file - """ - return os.path.join(self.input_params.get("cos-directory", ""), filename) - - def get_file_from_object_storage(self, file_to_get: str) -> None: - """Utility function to get files from an object storage - - :param file_to_get: filename - """ - - object_to_get = self.get_object_storage_filename(file_to_get) - t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration - ) - - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: - """Utility function to put files into an object storage - - :param file_to_upload: filename - :param object_name: remote filename (used to rename) - """ - - object_to_upload = object_name - if not object_to_upload: - object_to_upload = file_to_upload - - object_to_upload = self.get_object_storage_filename(object_to_upload) - t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration - ) - - def has_wildcard(self, filename): - wildcards = ["*", "?"] - return bool(any(c in filename for c in wildcards)) - - def process_output_file(self, output_file): - """Puts the file to object storage. Handles wildcards and directories.""" - - matched_files = [output_file] - if self.has_wildcard(output_file): # explode the wildcarded file - matched_files = glob.glob(output_file) - - for matched_file in matched_files: - if os.path.isdir(matched_file): - for file in os.listdir(matched_file): - self.process_output_file(os.path.join(matched_file, file)) - else: - self.put_file_to_object_storage(matched_file) - - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: - """Convert INOUT-separated string of pipeline parameters into a dictionary.""" - parameter_dict = {} - if pipeline_parameters: - parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) - for parameter in parameter_list: - param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): - continue # env vars must be non-empty strings - parameter_dict[param_name] = value - return parameter_dict - - def set_parameters_in_env(self) -> None: - """Make pipeline parameters available as environment variables.""" - for name, value in self.pipeline_param_dict.items(): - if name in os.environ: - continue # avoid overwriting env vars with the same name - os.environ[name] = value - - -class NotebookFileOp(FileOpBase): - """Perform Notebook File Operation""" - - def execute(self) -> None: - """Execute the Notebook and upload results to object storage""" - notebook = os.path.basename(self.filepath) - notebook_name = notebook.replace(".ipynb", "") - notebook_output = f"{notebook_name}-output.ipynb" - notebook_html = f"{notebook_name}.html" - - try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") - t0 = time.time() - # Include kernel selection in execution time - kernel_name = NotebookFileOp.find_best_kernel(notebook) - - kwargs = {} - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - import papermill - - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) - duration = time.time() - t0 - OpUtil.log_operation_info("notebook execution completed", duration) - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - raise ex - - @staticmethod - def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: - """Function to convert a Jupyter notebook file (.ipynb) into an html file - - :param notebook_file: object storage client - :param html_file: name of what the html output file should be - :return: html_file: the converted notebook in html format - """ - import nbconvert - import nbformat - - OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") - t0 = time.time() - nb = nbformat.read(notebook_file, as_version=4) - html_exporter = nbconvert.HTMLExporter() - data, resources = html_exporter.from_notebook_node(nb) - with open(html_file, "w") as f: - f.write(data) - f.close() - - duration = time.time() - t0 - OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) - return html_file - - @staticmethod - def find_best_kernel(notebook_file: str) -> str: - """Determines the best kernel to use via the following algorithm: - - 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. - 2. Gets the list of configured kernels using KernelSpecManager. - 3. If notebook kernel_name is in list, use that, else - 4. If not found, load each configured kernel.json file and find a language match. - 5. On first match, log info message regarding the switch and use that kernel. - 6. If no language match is found, revert to notebook kernel and log warning message. - """ - from jupyter_client.kernelspec import KernelSpecManager - import nbformat - - nb = nbformat.read(notebook_file, 4) - - nb_kspec = nb.metadata.kernelspec - nb_kernel_name = nb_kspec.get("name") - nb_kernel_lang = nb_kspec.get("language") - - kernel_specs = KernelSpecManager().find_kernel_specs() - - # see if we have a direct match... - if nb_kernel_name in kernel_specs.keys(): - return nb_kernel_name - - # no match found for kernel, try matching language... - for name, file in kernel_specs.items(): - # load file (JSON) and pick out language, if match, use first found - with open(os.path.join(file, "kernel.json")) as f: - kspec = json.load(f) - if kspec.get("language").lower() == nb_kernel_lang.lower(): - matched_kernel = os.path.basename(file) - logger.info( - f"Matched kernel by language ({nb_kernel_lang}), using kernel " - f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'." - ) - return matched_kernel - - # no match found for language, return notebook kernel and let execution fail - logger.warning( - f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " - f"language match ({nb_kernel_lang}) was found in current kernel specifications." - ) - return nb_kernel_name - - -class PythonFileOp(FileOpBase): - """Perform Python File Operation""" - - def execute(self) -> None: - """Execute the Python script and upload results to object storage""" - python_script = os.path.basename(self.filepath) - python_script_name = python_script.replace(".py", "") - # python_script_output = f"{python_script_name}.log" - - try: - OpUtil.log_operation_info( - f"executing python script using 'python3 {python_script}'" - ) - t0 = time.time() - - run_args = ["python3", python_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------Python logs start----------------------") - # Removing support for the s3 storage of python script logs - # with open(python_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------Python logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - duration = time.time() - t0 - OpUtil.log_operation_info("python script execution completed", duration) - - # self.put_file_to_object_storage(python_script_output, python_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(python_script_output, python_script_output) - raise ex - - -class RFileOp(FileOpBase): - """Perform R File Operation""" - - def execute(self) -> None: - """Execute the R script and upload results to object storage""" - r_script = os.path.basename(self.filepath) - r_script_name = r_script.replace(".r", "") - # r_script_output = f"{r_script_name}.log" - - try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}'") - t0 = time.time() - - run_args = ["Rscript", r_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------R script logs start----------------------") - # Removing support for the s3 storage of R script logs - # with open(r_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------R script logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - - duration = time.time() - t0 - OpUtil.log_operation_info("R script execution completed", duration) - - # self.put_file_to_object_storage(r_script_output, r_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(r_script_output, r_script_output) - raise ex - - -class OpUtil(object): - """Utility functions for preparing file execution.""" - - @classmethod - def package_install(cls, user_volume_path) -> None: - OpUtil.log_operation_info("Installing packages") - t0 = time.time() - requirements_file = cls.determine_elyra_requirements() - elyra_packages = cls.package_list_to_dict(requirements_file) - current_packages = cls.package_list_to_dict("requirements-current.txt") - to_install_list = [] - - for package, ver in elyra_packages.items(): - if package in current_packages: - if current_packages[package] is None: - logger.warning( - f"WARNING: Source package '{package}' found already installed as an " - "editable package. This may conflict with the required version: " - f"{ver} . Skipping..." - ) - continue - try: - version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant - logger.warning( - f"WARNING: Source package '{package}' found already installed from " - f"{current_packages[package]}. This may conflict with the required " - f"version: {ver} . Skipping..." - ) - continue - if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") - to_install_list.append(f"{package}=={ver}") - elif version.Version(ver) < version.Version(current_packages[package]): - logger.info( - f"Newer {package} package with version {current_packages[package]} " - f"already installed. Skipping..." - ) - else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") - to_install_list.append(f"{package}=={ver}") - - if to_install_list: - if user_volume_path: - to_install_list.insert(0, f"--target={user_volume_path}") - to_install_list.append("--no-cache-dir") - - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) - - if user_volume_path: - os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" - - subprocess.run([sys.executable, "-m", "pip", "freeze"]) - duration = time.time() - t0 - OpUtil.log_operation_info("Packages installed", duration) - - @classmethod - def determine_elyra_requirements(cls) -> Any: - if sys.version_info.major == 3: - if sys.version_info.minor in [8, 9, 10, 11]: - return "requirements-elyra.txt" - logger.error( - f"This version of Python '{sys.version_info.major}.{sys.version_info.minor}' " - f"is not supported for Elyra generic components" - ) - return None - - @classmethod - def package_list_to_dict(cls, filename: str) -> dict: - package_dict = {} - with open(filename) as fh: - for line in fh: - if line[0] != "#": - if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") - elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") - elif "==" in line: - package_name, package_version = line.strip("\n").split(sep="==") - elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system - package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory - package_name = os.path.basename(package_name) - package_version = None - else: - # Tolerate other formats but do not add to package list - continue - - package_dict[package_name] = package_version - - return package_dict - - @classmethod - def parse_arguments(cls, args) -> dict: - import argparse - - global pipeline_name, operation_name - - logger.debug("Parsing Arguments.....") - parser = argparse.ArgumentParser() - parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True - ) - parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True - ) - parser.add_argument( - "-d", - "--cos-directory", - dest="cos-directory", - help="Working directory in cloud object storage bucket to use", - required=True, - ) - parser.add_argument( - "-t", - "--cos-dependencies-archive", - dest="cos-dependencies-archive", - help="Archive containing notebook and dependency artifacts", - required=True, - ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) - parser.add_argument( - "-p", - "--user-volume-path", - dest="user-volume-path", - help="Directory in Volume to install python libraries into", - required=False, - ) - parser.add_argument( - "-n", - "--pipeline-name", - dest="pipeline-name", - help="Pipeline name", - required=True, - ) - parser.add_argument( - "-r", - "--pipeline-parameters", - dest="pipeline_parameters", - help="Pipeline parameters that apply to this node", - required=False, - ) - parser.add_argument( - "-m", - "--parameter-pass-method", - dest="parameter_pass_method", - choices=["env"], - help="The method by which pipeline parameters should be applied to this node.", - required=False, - ) - parsed_args = vars(parser.parse_args(args)) - - # set pipeline name as global - pipeline_name = parsed_args.get("pipeline-name") - # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) - - return parsed_args - - @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: - """Produces a formatted log INFO message used entirely for support purposes. - - This method is intended to be called for any entries that should be captured across aggregated - log files to identify steps within a given pipeline and each of its operations. As a result, - calls to this method should produce single-line entries in the log (no embedded newlines). - Each entry is prefixed with the pipeline name. - - General logging should NOT use this method but use logger.() statements directly. - - :param action_clause: str representing the action that is being logged - :param duration_secs: optional float value representing the duration of the action being logged - """ - global pipeline_name, operation_name - if enable_pipeline_info: - duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") - - -def main(): - # Configure logger format, level - logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG - ) - # Setup packages and gather arguments - input_params = OpUtil.parse_arguments(sys.argv[1:]) - OpUtil.log_operation_info("starting operation") - t0 = time.time() - - # Create the appropriate instance, process dependencies and execute the operation - file_op = FileOpBase.get_instance(**input_params) - - file_op.process_dependencies() - - file_op.execute() - - # Process notebook | script metrics and KFP UI metadata - file_op.process_metrics_and_metadata() - - duration = time.time() - t0 - OpUtil.log_operation_info("operation completed", duration) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm b/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm index d14a8b87cc..c1ace8a85d 100644 --- a/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm +++ b/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm @@ -69,6 +69,10 @@ COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ # Copy utility script COPY ${PYTORCH_SOURCE_CODE}/de-vendor-torch.sh ./ +# Download Elyra boostrapper.py +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ + -o ./utils/bootstrapper.py + RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, # we often don't know the correct hashes and `--require-hashes` would therefore fail on non amd64, where building is common. diff --git a/runtimes/rocm-pytorch/ubi9-python-3.12/utils/bootstrapper.py b/runtimes/rocm-pytorch/ubi9-python-3.12/utils/bootstrapper.py deleted file mode 100644 index 8009048682..0000000000 --- a/runtimes/rocm-pytorch/ubi9-python-3.12/utils/bootstrapper.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py -# -# Copyright 2018-2023 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from abc import abstractmethod -import glob -import json -import logging -import os -from pathlib import Path -import subprocess -import sys -from tempfile import TemporaryFile -import time -from typing import Any -from typing import Dict -from typing import Optional -from typing import Type -from typing import TypeVar -from urllib.parse import urljoin -from urllib.parse import urlparse -from urllib.parse import urlunparse - -from packaging import version - - -# Inputs and Outputs separator character. If updated, -# same-named variable in _notebook_op.py must be updated! -INOUT_SEPARATOR = ";" - -# Setup forward reference for type hint on return from class factory method. See -# https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 -F = TypeVar("F", bound="FileOpBase") - -logger = logging.getLogger("elyra") -enable_pipeline_info = os.getenv("ELYRA_ENABLE_PIPELINE_INFO", "true").lower() == "true" -pipeline_name = None # global used in formatted logging -operation_name = None # global used in formatted logging - - -class FileOpBase(ABC): - """Abstract base class for file-based operations""" - - filepath = None - cos_client = None - cos_bucket = None - - @classmethod - def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" - filepath = kwargs["filepath"] - if ".ipynb" in filepath: - return NotebookFileOp(**kwargs) - elif ".py" in filepath: - return PythonFileOp(**kwargs) - elif ".r" in filepath: - return RFileOp(**kwargs) - else: - raise ValueError(f"Unsupported file type: {filepath}") - - def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" - import minio - from minio.credentials import providers - - self.filepath = kwargs["filepath"] - self.input_params = kwargs or {} - self.cos_endpoint = urlparse(self.input_params.get("cos-endpoint")) - self.cos_bucket = self.input_params.get("cos-bucket") - - self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) - - # Infer secure from the endpoint's scheme. - self.secure = self.cos_endpoint.scheme == "https" - - # get minio credentials provider - if "cos-user" in self.input_params and "cos-password" in self.input_params: - cred_provider = providers.StaticProvider( - access_key=self.input_params.get("cos-user"), - secret_key=self.input_params.get("cos-password"), - ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: - cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: - cred_provider = providers.IamAwsProvider() - else: - raise RuntimeError( - "No minio credentials provider can be initialised for current configs. " - "Please validate your runtime configuration details and retry." - ) - - # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) - - @abstractmethod - def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") - - def process_dependencies(self) -> None: - """Process dependencies - - If a dependency archive is present, it will be downloaded from object storage - and expanded into the local directory. - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing dependencies") - t0 = time.time() - archive_file = self.input_params.get("cos-dependencies-archive") - - self.get_file_from_object_storage(archive_file) - - inputs = self.input_params.get("inputs") - if inputs: - input_list = inputs.split(INOUT_SEPARATOR) - for file in input_list: - self.get_file_from_object_storage(file.strip()) - - subprocess.call(["tar", "-zxvf", archive_file]) - duration = time.time() - t0 - OpUtil.log_operation_info("dependencies processed", duration) - - def process_outputs(self) -> None: - """Process outputs - - If outputs have been specified, it will upload the appropriate files to object storage - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing outputs") - t0 = time.time() - outputs = self.input_params.get("outputs") - if outputs: - output_list = outputs.split(INOUT_SEPARATOR) - for file in output_list: - self.process_output_file(file.strip()) - duration = time.time() - t0 - OpUtil.log_operation_info("outputs processed", duration) - - def process_metrics_and_metadata(self) -> None: - """Process metrics and metadata - - This method exposes metrics/metadata that the processed - notebook | script produces in the KFP UI. - - This method should not be overridden by subclasses. - """ - - OpUtil.log_operation_info("processing metrics and metadata") - t0 = time.time() - - # Location where the KFP specific output files will be stored - # in the environment where the bootsrapper is running. - # Defaults to '/tmp' if not specified. - output_path = Path(os.getenv("ELYRA_WRITABLE_CONTAINER_DIR", "/tmp")) - - # verify that output_path exists, is a directory - # and writable by creating a temporary file in that location - try: - with TemporaryFile(mode="w", dir=output_path) as t: - t.write("can write") - except Exception: - # output_path doesn't meet the requirements - # treat this as a non-fatal error and log a warning - logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) - return - - # Name of the proprietary KFP UI metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_ui_metadata_filename = "mlpipeline-ui-metadata.json" - - # Name of the proprietary KFP metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_metrics_filename = "mlpipeline-metrics.json" - - # If the notebook | Python script produced one of the files - # copy it to the target location where KFP is looking for it. - for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: - try: - src = Path(".") / filename - logger.debug(f"Processing {src} ...") - # try to load the file, if one was created by the - # notebook or script - with open(src, "r") as f: - metadata = json.load(f) - - # the file exists and contains valid JSON - logger.debug(f"File content: {json.dumps(metadata)}") - - target = output_path / filename - # try to save the file in the destination location - with open(target, "w") as f: - json.dump(metadata, f) - except FileNotFoundError: - # The script | notebook didn't produce the file - # we are looking for. This is not an error condition - # that needs to be handled. - logger.debug(f"{self.filepath} produced no file named {src}") - except ValueError as ve: - # The file content could not be parsed. Log a warning - # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") - except Exception as ex: - # Something is wrong with the user-generated metadata file. - # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") - - # - # Augment kfp_ui_metadata_filename with Elyra-specific information: - # - link to object storage where input and output artifacts are - # stored - ui_metadata_output = output_path / kfp_ui_metadata_filename - try: - # re-load the file - with open(ui_metadata_output, "r") as f: - metadata = json.load(f) - except Exception: - # ignore all errors - metadata = {} - - # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): - metadata["outputs"] = [] - - # Define HREF for COS bucket: - # // - bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" - ) - - # add Elyra metadata to 'outputs' - metadata["outputs"].append( - { - "storage": "inline", - "source": f"## Inputs for {self.filepath}\n" - f"[{self.input_params['cos-dependencies-archive']}]({bucket_url})", - "type": "markdown", - } - ) - - # print the content of the augmented metadata file - logger.debug(f"Output UI metadata: {json.dumps(metadata)}") - - logger.debug(f"Saving UI metadata file as {ui_metadata_output} ...") - - # Save [updated] KFP UI metadata file - with open(ui_metadata_output, "w") as f: - json.dump(metadata, f) - - duration = time.time() - t0 - OpUtil.log_operation_info("metrics and metadata processed", duration) - - def get_object_storage_filename(self, filename: str) -> str: - """Function to pre-pend cloud storage working dir to file name - - :param filename: the local file - :return: the full path of the object storage file - """ - return os.path.join(self.input_params.get("cos-directory", ""), filename) - - def get_file_from_object_storage(self, file_to_get: str) -> None: - """Utility function to get files from an object storage - - :param file_to_get: filename - """ - - object_to_get = self.get_object_storage_filename(file_to_get) - t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration - ) - - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: - """Utility function to put files into an object storage - - :param file_to_upload: filename - :param object_name: remote filename (used to rename) - """ - - object_to_upload = object_name - if not object_to_upload: - object_to_upload = file_to_upload - - object_to_upload = self.get_object_storage_filename(object_to_upload) - t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration - ) - - def has_wildcard(self, filename): - wildcards = ["*", "?"] - return bool(any(c in filename for c in wildcards)) - - def process_output_file(self, output_file): - """Puts the file to object storage. Handles wildcards and directories.""" - - matched_files = [output_file] - if self.has_wildcard(output_file): # explode the wildcarded file - matched_files = glob.glob(output_file) - - for matched_file in matched_files: - if os.path.isdir(matched_file): - for file in os.listdir(matched_file): - self.process_output_file(os.path.join(matched_file, file)) - else: - self.put_file_to_object_storage(matched_file) - - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: - """Convert INOUT-separated string of pipeline parameters into a dictionary.""" - parameter_dict = {} - if pipeline_parameters: - parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) - for parameter in parameter_list: - param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): - continue # env vars must be non-empty strings - parameter_dict[param_name] = value - return parameter_dict - - def set_parameters_in_env(self) -> None: - """Make pipeline parameters available as environment variables.""" - for name, value in self.pipeline_param_dict.items(): - if name in os.environ: - continue # avoid overwriting env vars with the same name - os.environ[name] = value - - -class NotebookFileOp(FileOpBase): - """Perform Notebook File Operation""" - - def execute(self) -> None: - """Execute the Notebook and upload results to object storage""" - notebook = os.path.basename(self.filepath) - notebook_name = notebook.replace(".ipynb", "") - notebook_output = f"{notebook_name}-output.ipynb" - notebook_html = f"{notebook_name}.html" - - try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") - t0 = time.time() - # Include kernel selection in execution time - kernel_name = NotebookFileOp.find_best_kernel(notebook) - - kwargs = {} - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - import papermill - - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) - duration = time.time() - t0 - OpUtil.log_operation_info("notebook execution completed", duration) - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - raise ex - - @staticmethod - def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: - """Function to convert a Jupyter notebook file (.ipynb) into an html file - - :param notebook_file: object storage client - :param html_file: name of what the html output file should be - :return: html_file: the converted notebook in html format - """ - import nbconvert - import nbformat - - OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") - t0 = time.time() - nb = nbformat.read(notebook_file, as_version=4) - html_exporter = nbconvert.HTMLExporter() - data, resources = html_exporter.from_notebook_node(nb) - with open(html_file, "w") as f: - f.write(data) - f.close() - - duration = time.time() - t0 - OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) - return html_file - - @staticmethod - def find_best_kernel(notebook_file: str) -> str: - """Determines the best kernel to use via the following algorithm: - - 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. - 2. Gets the list of configured kernels using KernelSpecManager. - 3. If notebook kernel_name is in list, use that, else - 4. If not found, load each configured kernel.json file and find a language match. - 5. On first match, log info message regarding the switch and use that kernel. - 6. If no language match is found, revert to notebook kernel and log warning message. - """ - from jupyter_client.kernelspec import KernelSpecManager - import nbformat - - nb = nbformat.read(notebook_file, 4) - - nb_kspec = nb.metadata.kernelspec - nb_kernel_name = nb_kspec.get("name") - nb_kernel_lang = nb_kspec.get("language") - - kernel_specs = KernelSpecManager().find_kernel_specs() - - # see if we have a direct match... - if nb_kernel_name in kernel_specs.keys(): - return nb_kernel_name - - # no match found for kernel, try matching language... - for name, file in kernel_specs.items(): - # load file (JSON) and pick out language, if match, use first found - with open(os.path.join(file, "kernel.json")) as f: - kspec = json.load(f) - if kspec.get("language").lower() == nb_kernel_lang.lower(): - matched_kernel = os.path.basename(file) - logger.info( - f"Matched kernel by language ({nb_kernel_lang}), using kernel " - f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'." - ) - return matched_kernel - - # no match found for language, return notebook kernel and let execution fail - logger.warning( - f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " - f"language match ({nb_kernel_lang}) was found in current kernel specifications." - ) - return nb_kernel_name - - -class PythonFileOp(FileOpBase): - """Perform Python File Operation""" - - def execute(self) -> None: - """Execute the Python script and upload results to object storage""" - python_script = os.path.basename(self.filepath) - python_script_name = python_script.replace(".py", "") - # python_script_output = f"{python_script_name}.log" - - try: - OpUtil.log_operation_info( - f"executing python script using 'python3 {python_script}'" - ) - t0 = time.time() - - run_args = ["python3", python_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------Python logs start----------------------") - # Removing support for the s3 storage of python script logs - # with open(python_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------Python logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - duration = time.time() - t0 - OpUtil.log_operation_info("python script execution completed", duration) - - # self.put_file_to_object_storage(python_script_output, python_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(python_script_output, python_script_output) - raise ex - - -class RFileOp(FileOpBase): - """Perform R File Operation""" - - def execute(self) -> None: - """Execute the R script and upload results to object storage""" - r_script = os.path.basename(self.filepath) - r_script_name = r_script.replace(".r", "") - # r_script_output = f"{r_script_name}.log" - - try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}'") - t0 = time.time() - - run_args = ["Rscript", r_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------R script logs start----------------------") - # Removing support for the s3 storage of R script logs - # with open(r_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------R script logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - - duration = time.time() - t0 - OpUtil.log_operation_info("R script execution completed", duration) - - # self.put_file_to_object_storage(r_script_output, r_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(r_script_output, r_script_output) - raise ex - - -class OpUtil(object): - """Utility functions for preparing file execution.""" - - @classmethod - def package_install(cls, user_volume_path) -> None: - OpUtil.log_operation_info("Installing packages") - t0 = time.time() - requirements_file = cls.determine_elyra_requirements() - elyra_packages = cls.package_list_to_dict(requirements_file) - current_packages = cls.package_list_to_dict("requirements-current.txt") - to_install_list = [] - - for package, ver in elyra_packages.items(): - if package in current_packages: - if current_packages[package] is None: - logger.warning( - f"WARNING: Source package '{package}' found already installed as an " - "editable package. This may conflict with the required version: " - f"{ver} . Skipping..." - ) - continue - try: - version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant - logger.warning( - f"WARNING: Source package '{package}' found already installed from " - f"{current_packages[package]}. This may conflict with the required " - f"version: {ver} . Skipping..." - ) - continue - if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") - to_install_list.append(f"{package}=={ver}") - elif version.Version(ver) < version.Version(current_packages[package]): - logger.info( - f"Newer {package} package with version {current_packages[package]} " - f"already installed. Skipping..." - ) - else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") - to_install_list.append(f"{package}=={ver}") - - if to_install_list: - if user_volume_path: - to_install_list.insert(0, f"--target={user_volume_path}") - to_install_list.append("--no-cache-dir") - - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) - - if user_volume_path: - os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" - - subprocess.run([sys.executable, "-m", "pip", "freeze"]) - duration = time.time() - t0 - OpUtil.log_operation_info("Packages installed", duration) - - @classmethod - def determine_elyra_requirements(cls) -> Any: - if sys.version_info.major == 3: - if sys.version_info.minor in [8, 9, 10, 11]: - return "requirements-elyra.txt" - logger.error( - f"This version of Python '{sys.version_info.major}.{sys.version_info.minor}' " - f"is not supported for Elyra generic components" - ) - return None - - @classmethod - def package_list_to_dict(cls, filename: str) -> dict: - package_dict = {} - with open(filename) as fh: - for line in fh: - if line[0] != "#": - if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") - elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") - elif "==" in line: - package_name, package_version = line.strip("\n").split(sep="==") - elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system - package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory - package_name = os.path.basename(package_name) - package_version = None - else: - # Tolerate other formats but do not add to package list - continue - - package_dict[package_name] = package_version - - return package_dict - - @classmethod - def parse_arguments(cls, args) -> dict: - import argparse - - global pipeline_name, operation_name - - logger.debug("Parsing Arguments.....") - parser = argparse.ArgumentParser() - parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True - ) - parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True - ) - parser.add_argument( - "-d", - "--cos-directory", - dest="cos-directory", - help="Working directory in cloud object storage bucket to use", - required=True, - ) - parser.add_argument( - "-t", - "--cos-dependencies-archive", - dest="cos-dependencies-archive", - help="Archive containing notebook and dependency artifacts", - required=True, - ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) - parser.add_argument( - "-p", - "--user-volume-path", - dest="user-volume-path", - help="Directory in Volume to install python libraries into", - required=False, - ) - parser.add_argument( - "-n", - "--pipeline-name", - dest="pipeline-name", - help="Pipeline name", - required=True, - ) - parser.add_argument( - "-r", - "--pipeline-parameters", - dest="pipeline_parameters", - help="Pipeline parameters that apply to this node", - required=False, - ) - parser.add_argument( - "-m", - "--parameter-pass-method", - dest="parameter_pass_method", - choices=["env"], - help="The method by which pipeline parameters should be applied to this node.", - required=False, - ) - parsed_args = vars(parser.parse_args(args)) - - # set pipeline name as global - pipeline_name = parsed_args.get("pipeline-name") - # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) - - return parsed_args - - @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: - """Produces a formatted log INFO message used entirely for support purposes. - - This method is intended to be called for any entries that should be captured across aggregated - log files to identify steps within a given pipeline and each of its operations. As a result, - calls to this method should produce single-line entries in the log (no embedded newlines). - Each entry is prefixed with the pipeline name. - - General logging should NOT use this method but use logger.() statements directly. - - :param action_clause: str representing the action that is being logged - :param duration_secs: optional float value representing the duration of the action being logged - """ - global pipeline_name, operation_name - if enable_pipeline_info: - duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") - - -def main(): - # Configure logger format, level - logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG - ) - # Setup packages and gather arguments - input_params = OpUtil.parse_arguments(sys.argv[1:]) - OpUtil.log_operation_info("starting operation") - t0 = time.time() - - # Create the appropriate instance, process dependencies and execute the operation - file_op = FileOpBase.get_instance(**input_params) - - file_op.process_dependencies() - - file_op.execute() - - # Process notebook | script metrics and KFP UI metadata - file_op.process_metrics_and_metadata() - - duration = time.time() - t0 - OpUtil.log_operation_info("operation completed", duration) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm b/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm index b8a464df23..587add0b5d 100644 --- a/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm +++ b/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm @@ -67,6 +67,9 @@ WORKDIR /opt/app-root/bin COPY ${TENSORFLOW_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${TENSORFLOW_SOURCE_CODE}/utils ./utils/ +# Download Elyra boostrapper.py +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ + -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/rocm-tensorflow/ubi9-python-3.12/utils/bootstrapper.py b/runtimes/rocm-tensorflow/ubi9-python-3.12/utils/bootstrapper.py deleted file mode 100644 index 8009048682..0000000000 --- a/runtimes/rocm-tensorflow/ubi9-python-3.12/utils/bootstrapper.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py -# -# Copyright 2018-2023 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from abc import abstractmethod -import glob -import json -import logging -import os -from pathlib import Path -import subprocess -import sys -from tempfile import TemporaryFile -import time -from typing import Any -from typing import Dict -from typing import Optional -from typing import Type -from typing import TypeVar -from urllib.parse import urljoin -from urllib.parse import urlparse -from urllib.parse import urlunparse - -from packaging import version - - -# Inputs and Outputs separator character. If updated, -# same-named variable in _notebook_op.py must be updated! -INOUT_SEPARATOR = ";" - -# Setup forward reference for type hint on return from class factory method. See -# https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 -F = TypeVar("F", bound="FileOpBase") - -logger = logging.getLogger("elyra") -enable_pipeline_info = os.getenv("ELYRA_ENABLE_PIPELINE_INFO", "true").lower() == "true" -pipeline_name = None # global used in formatted logging -operation_name = None # global used in formatted logging - - -class FileOpBase(ABC): - """Abstract base class for file-based operations""" - - filepath = None - cos_client = None - cos_bucket = None - - @classmethod - def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" - filepath = kwargs["filepath"] - if ".ipynb" in filepath: - return NotebookFileOp(**kwargs) - elif ".py" in filepath: - return PythonFileOp(**kwargs) - elif ".r" in filepath: - return RFileOp(**kwargs) - else: - raise ValueError(f"Unsupported file type: {filepath}") - - def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" - import minio - from minio.credentials import providers - - self.filepath = kwargs["filepath"] - self.input_params = kwargs or {} - self.cos_endpoint = urlparse(self.input_params.get("cos-endpoint")) - self.cos_bucket = self.input_params.get("cos-bucket") - - self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) - - # Infer secure from the endpoint's scheme. - self.secure = self.cos_endpoint.scheme == "https" - - # get minio credentials provider - if "cos-user" in self.input_params and "cos-password" in self.input_params: - cred_provider = providers.StaticProvider( - access_key=self.input_params.get("cos-user"), - secret_key=self.input_params.get("cos-password"), - ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: - cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: - cred_provider = providers.IamAwsProvider() - else: - raise RuntimeError( - "No minio credentials provider can be initialised for current configs. " - "Please validate your runtime configuration details and retry." - ) - - # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) - - @abstractmethod - def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") - - def process_dependencies(self) -> None: - """Process dependencies - - If a dependency archive is present, it will be downloaded from object storage - and expanded into the local directory. - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing dependencies") - t0 = time.time() - archive_file = self.input_params.get("cos-dependencies-archive") - - self.get_file_from_object_storage(archive_file) - - inputs = self.input_params.get("inputs") - if inputs: - input_list = inputs.split(INOUT_SEPARATOR) - for file in input_list: - self.get_file_from_object_storage(file.strip()) - - subprocess.call(["tar", "-zxvf", archive_file]) - duration = time.time() - t0 - OpUtil.log_operation_info("dependencies processed", duration) - - def process_outputs(self) -> None: - """Process outputs - - If outputs have been specified, it will upload the appropriate files to object storage - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing outputs") - t0 = time.time() - outputs = self.input_params.get("outputs") - if outputs: - output_list = outputs.split(INOUT_SEPARATOR) - for file in output_list: - self.process_output_file(file.strip()) - duration = time.time() - t0 - OpUtil.log_operation_info("outputs processed", duration) - - def process_metrics_and_metadata(self) -> None: - """Process metrics and metadata - - This method exposes metrics/metadata that the processed - notebook | script produces in the KFP UI. - - This method should not be overridden by subclasses. - """ - - OpUtil.log_operation_info("processing metrics and metadata") - t0 = time.time() - - # Location where the KFP specific output files will be stored - # in the environment where the bootsrapper is running. - # Defaults to '/tmp' if not specified. - output_path = Path(os.getenv("ELYRA_WRITABLE_CONTAINER_DIR", "/tmp")) - - # verify that output_path exists, is a directory - # and writable by creating a temporary file in that location - try: - with TemporaryFile(mode="w", dir=output_path) as t: - t.write("can write") - except Exception: - # output_path doesn't meet the requirements - # treat this as a non-fatal error and log a warning - logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) - return - - # Name of the proprietary KFP UI metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_ui_metadata_filename = "mlpipeline-ui-metadata.json" - - # Name of the proprietary KFP metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_metrics_filename = "mlpipeline-metrics.json" - - # If the notebook | Python script produced one of the files - # copy it to the target location where KFP is looking for it. - for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: - try: - src = Path(".") / filename - logger.debug(f"Processing {src} ...") - # try to load the file, if one was created by the - # notebook or script - with open(src, "r") as f: - metadata = json.load(f) - - # the file exists and contains valid JSON - logger.debug(f"File content: {json.dumps(metadata)}") - - target = output_path / filename - # try to save the file in the destination location - with open(target, "w") as f: - json.dump(metadata, f) - except FileNotFoundError: - # The script | notebook didn't produce the file - # we are looking for. This is not an error condition - # that needs to be handled. - logger.debug(f"{self.filepath} produced no file named {src}") - except ValueError as ve: - # The file content could not be parsed. Log a warning - # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") - except Exception as ex: - # Something is wrong with the user-generated metadata file. - # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") - - # - # Augment kfp_ui_metadata_filename with Elyra-specific information: - # - link to object storage where input and output artifacts are - # stored - ui_metadata_output = output_path / kfp_ui_metadata_filename - try: - # re-load the file - with open(ui_metadata_output, "r") as f: - metadata = json.load(f) - except Exception: - # ignore all errors - metadata = {} - - # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): - metadata["outputs"] = [] - - # Define HREF for COS bucket: - # // - bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" - ) - - # add Elyra metadata to 'outputs' - metadata["outputs"].append( - { - "storage": "inline", - "source": f"## Inputs for {self.filepath}\n" - f"[{self.input_params['cos-dependencies-archive']}]({bucket_url})", - "type": "markdown", - } - ) - - # print the content of the augmented metadata file - logger.debug(f"Output UI metadata: {json.dumps(metadata)}") - - logger.debug(f"Saving UI metadata file as {ui_metadata_output} ...") - - # Save [updated] KFP UI metadata file - with open(ui_metadata_output, "w") as f: - json.dump(metadata, f) - - duration = time.time() - t0 - OpUtil.log_operation_info("metrics and metadata processed", duration) - - def get_object_storage_filename(self, filename: str) -> str: - """Function to pre-pend cloud storage working dir to file name - - :param filename: the local file - :return: the full path of the object storage file - """ - return os.path.join(self.input_params.get("cos-directory", ""), filename) - - def get_file_from_object_storage(self, file_to_get: str) -> None: - """Utility function to get files from an object storage - - :param file_to_get: filename - """ - - object_to_get = self.get_object_storage_filename(file_to_get) - t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration - ) - - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: - """Utility function to put files into an object storage - - :param file_to_upload: filename - :param object_name: remote filename (used to rename) - """ - - object_to_upload = object_name - if not object_to_upload: - object_to_upload = file_to_upload - - object_to_upload = self.get_object_storage_filename(object_to_upload) - t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration - ) - - def has_wildcard(self, filename): - wildcards = ["*", "?"] - return bool(any(c in filename for c in wildcards)) - - def process_output_file(self, output_file): - """Puts the file to object storage. Handles wildcards and directories.""" - - matched_files = [output_file] - if self.has_wildcard(output_file): # explode the wildcarded file - matched_files = glob.glob(output_file) - - for matched_file in matched_files: - if os.path.isdir(matched_file): - for file in os.listdir(matched_file): - self.process_output_file(os.path.join(matched_file, file)) - else: - self.put_file_to_object_storage(matched_file) - - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: - """Convert INOUT-separated string of pipeline parameters into a dictionary.""" - parameter_dict = {} - if pipeline_parameters: - parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) - for parameter in parameter_list: - param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): - continue # env vars must be non-empty strings - parameter_dict[param_name] = value - return parameter_dict - - def set_parameters_in_env(self) -> None: - """Make pipeline parameters available as environment variables.""" - for name, value in self.pipeline_param_dict.items(): - if name in os.environ: - continue # avoid overwriting env vars with the same name - os.environ[name] = value - - -class NotebookFileOp(FileOpBase): - """Perform Notebook File Operation""" - - def execute(self) -> None: - """Execute the Notebook and upload results to object storage""" - notebook = os.path.basename(self.filepath) - notebook_name = notebook.replace(".ipynb", "") - notebook_output = f"{notebook_name}-output.ipynb" - notebook_html = f"{notebook_name}.html" - - try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") - t0 = time.time() - # Include kernel selection in execution time - kernel_name = NotebookFileOp.find_best_kernel(notebook) - - kwargs = {} - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - import papermill - - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) - duration = time.time() - t0 - OpUtil.log_operation_info("notebook execution completed", duration) - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - raise ex - - @staticmethod - def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: - """Function to convert a Jupyter notebook file (.ipynb) into an html file - - :param notebook_file: object storage client - :param html_file: name of what the html output file should be - :return: html_file: the converted notebook in html format - """ - import nbconvert - import nbformat - - OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") - t0 = time.time() - nb = nbformat.read(notebook_file, as_version=4) - html_exporter = nbconvert.HTMLExporter() - data, resources = html_exporter.from_notebook_node(nb) - with open(html_file, "w") as f: - f.write(data) - f.close() - - duration = time.time() - t0 - OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) - return html_file - - @staticmethod - def find_best_kernel(notebook_file: str) -> str: - """Determines the best kernel to use via the following algorithm: - - 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. - 2. Gets the list of configured kernels using KernelSpecManager. - 3. If notebook kernel_name is in list, use that, else - 4. If not found, load each configured kernel.json file and find a language match. - 5. On first match, log info message regarding the switch and use that kernel. - 6. If no language match is found, revert to notebook kernel and log warning message. - """ - from jupyter_client.kernelspec import KernelSpecManager - import nbformat - - nb = nbformat.read(notebook_file, 4) - - nb_kspec = nb.metadata.kernelspec - nb_kernel_name = nb_kspec.get("name") - nb_kernel_lang = nb_kspec.get("language") - - kernel_specs = KernelSpecManager().find_kernel_specs() - - # see if we have a direct match... - if nb_kernel_name in kernel_specs.keys(): - return nb_kernel_name - - # no match found for kernel, try matching language... - for name, file in kernel_specs.items(): - # load file (JSON) and pick out language, if match, use first found - with open(os.path.join(file, "kernel.json")) as f: - kspec = json.load(f) - if kspec.get("language").lower() == nb_kernel_lang.lower(): - matched_kernel = os.path.basename(file) - logger.info( - f"Matched kernel by language ({nb_kernel_lang}), using kernel " - f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'." - ) - return matched_kernel - - # no match found for language, return notebook kernel and let execution fail - logger.warning( - f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " - f"language match ({nb_kernel_lang}) was found in current kernel specifications." - ) - return nb_kernel_name - - -class PythonFileOp(FileOpBase): - """Perform Python File Operation""" - - def execute(self) -> None: - """Execute the Python script and upload results to object storage""" - python_script = os.path.basename(self.filepath) - python_script_name = python_script.replace(".py", "") - # python_script_output = f"{python_script_name}.log" - - try: - OpUtil.log_operation_info( - f"executing python script using 'python3 {python_script}'" - ) - t0 = time.time() - - run_args = ["python3", python_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------Python logs start----------------------") - # Removing support for the s3 storage of python script logs - # with open(python_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------Python logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - duration = time.time() - t0 - OpUtil.log_operation_info("python script execution completed", duration) - - # self.put_file_to_object_storage(python_script_output, python_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(python_script_output, python_script_output) - raise ex - - -class RFileOp(FileOpBase): - """Perform R File Operation""" - - def execute(self) -> None: - """Execute the R script and upload results to object storage""" - r_script = os.path.basename(self.filepath) - r_script_name = r_script.replace(".r", "") - # r_script_output = f"{r_script_name}.log" - - try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}'") - t0 = time.time() - - run_args = ["Rscript", r_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------R script logs start----------------------") - # Removing support for the s3 storage of R script logs - # with open(r_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------R script logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - - duration = time.time() - t0 - OpUtil.log_operation_info("R script execution completed", duration) - - # self.put_file_to_object_storage(r_script_output, r_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(r_script_output, r_script_output) - raise ex - - -class OpUtil(object): - """Utility functions for preparing file execution.""" - - @classmethod - def package_install(cls, user_volume_path) -> None: - OpUtil.log_operation_info("Installing packages") - t0 = time.time() - requirements_file = cls.determine_elyra_requirements() - elyra_packages = cls.package_list_to_dict(requirements_file) - current_packages = cls.package_list_to_dict("requirements-current.txt") - to_install_list = [] - - for package, ver in elyra_packages.items(): - if package in current_packages: - if current_packages[package] is None: - logger.warning( - f"WARNING: Source package '{package}' found already installed as an " - "editable package. This may conflict with the required version: " - f"{ver} . Skipping..." - ) - continue - try: - version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant - logger.warning( - f"WARNING: Source package '{package}' found already installed from " - f"{current_packages[package]}. This may conflict with the required " - f"version: {ver} . Skipping..." - ) - continue - if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") - to_install_list.append(f"{package}=={ver}") - elif version.Version(ver) < version.Version(current_packages[package]): - logger.info( - f"Newer {package} package with version {current_packages[package]} " - f"already installed. Skipping..." - ) - else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") - to_install_list.append(f"{package}=={ver}") - - if to_install_list: - if user_volume_path: - to_install_list.insert(0, f"--target={user_volume_path}") - to_install_list.append("--no-cache-dir") - - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) - - if user_volume_path: - os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" - - subprocess.run([sys.executable, "-m", "pip", "freeze"]) - duration = time.time() - t0 - OpUtil.log_operation_info("Packages installed", duration) - - @classmethod - def determine_elyra_requirements(cls) -> Any: - if sys.version_info.major == 3: - if sys.version_info.minor in [8, 9, 10, 11]: - return "requirements-elyra.txt" - logger.error( - f"This version of Python '{sys.version_info.major}.{sys.version_info.minor}' " - f"is not supported for Elyra generic components" - ) - return None - - @classmethod - def package_list_to_dict(cls, filename: str) -> dict: - package_dict = {} - with open(filename) as fh: - for line in fh: - if line[0] != "#": - if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") - elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") - elif "==" in line: - package_name, package_version = line.strip("\n").split(sep="==") - elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system - package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory - package_name = os.path.basename(package_name) - package_version = None - else: - # Tolerate other formats but do not add to package list - continue - - package_dict[package_name] = package_version - - return package_dict - - @classmethod - def parse_arguments(cls, args) -> dict: - import argparse - - global pipeline_name, operation_name - - logger.debug("Parsing Arguments.....") - parser = argparse.ArgumentParser() - parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True - ) - parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True - ) - parser.add_argument( - "-d", - "--cos-directory", - dest="cos-directory", - help="Working directory in cloud object storage bucket to use", - required=True, - ) - parser.add_argument( - "-t", - "--cos-dependencies-archive", - dest="cos-dependencies-archive", - help="Archive containing notebook and dependency artifacts", - required=True, - ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) - parser.add_argument( - "-p", - "--user-volume-path", - dest="user-volume-path", - help="Directory in Volume to install python libraries into", - required=False, - ) - parser.add_argument( - "-n", - "--pipeline-name", - dest="pipeline-name", - help="Pipeline name", - required=True, - ) - parser.add_argument( - "-r", - "--pipeline-parameters", - dest="pipeline_parameters", - help="Pipeline parameters that apply to this node", - required=False, - ) - parser.add_argument( - "-m", - "--parameter-pass-method", - dest="parameter_pass_method", - choices=["env"], - help="The method by which pipeline parameters should be applied to this node.", - required=False, - ) - parsed_args = vars(parser.parse_args(args)) - - # set pipeline name as global - pipeline_name = parsed_args.get("pipeline-name") - # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) - - return parsed_args - - @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: - """Produces a formatted log INFO message used entirely for support purposes. - - This method is intended to be called for any entries that should be captured across aggregated - log files to identify steps within a given pipeline and each of its operations. As a result, - calls to this method should produce single-line entries in the log (no embedded newlines). - Each entry is prefixed with the pipeline name. - - General logging should NOT use this method but use logger.() statements directly. - - :param action_clause: str representing the action that is being logged - :param duration_secs: optional float value representing the duration of the action being logged - """ - global pipeline_name, operation_name - if enable_pipeline_info: - duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") - - -def main(): - # Configure logger format, level - logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG - ) - # Setup packages and gather arguments - input_params = OpUtil.parse_arguments(sys.argv[1:]) - OpUtil.log_operation_info("starting operation") - t0 = time.time() - - # Create the appropriate instance, process dependencies and execute the operation - file_op = FileOpBase.get_instance(**input_params) - - file_op.process_dependencies() - - file_op.execute() - - # Process notebook | script metrics and KFP UI metadata - file_op.process_metrics_and_metadata() - - duration = time.time() - t0 - OpUtil.log_operation_info("operation completed", duration) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda b/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda index c6663ef1b0..31044dd6a9 100644 --- a/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda @@ -71,6 +71,9 @@ WORKDIR /opt/app-root/bin COPY ${TENSORFLOW_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${TENSORFLOW_SOURCE_CODE}/utils ./utils/ +# Download Elyra boostrapper.py +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ + -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/tensorflow/ubi9-python-3.12/utils/bootstrapper.py b/runtimes/tensorflow/ubi9-python-3.12/utils/bootstrapper.py deleted file mode 100644 index 8009048682..0000000000 --- a/runtimes/tensorflow/ubi9-python-3.12/utils/bootstrapper.py +++ /dev/null @@ -1,769 +0,0 @@ -# Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py -# -# Copyright 2018-2023 Elyra Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from abc import abstractmethod -import glob -import json -import logging -import os -from pathlib import Path -import subprocess -import sys -from tempfile import TemporaryFile -import time -from typing import Any -from typing import Dict -from typing import Optional -from typing import Type -from typing import TypeVar -from urllib.parse import urljoin -from urllib.parse import urlparse -from urllib.parse import urlunparse - -from packaging import version - - -# Inputs and Outputs separator character. If updated, -# same-named variable in _notebook_op.py must be updated! -INOUT_SEPARATOR = ";" - -# Setup forward reference for type hint on return from class factory method. See -# https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 -F = TypeVar("F", bound="FileOpBase") - -logger = logging.getLogger("elyra") -enable_pipeline_info = os.getenv("ELYRA_ENABLE_PIPELINE_INFO", "true").lower() == "true" -pipeline_name = None # global used in formatted logging -operation_name = None # global used in formatted logging - - -class FileOpBase(ABC): - """Abstract base class for file-based operations""" - - filepath = None - cos_client = None - cos_bucket = None - - @classmethod - def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" - filepath = kwargs["filepath"] - if ".ipynb" in filepath: - return NotebookFileOp(**kwargs) - elif ".py" in filepath: - return PythonFileOp(**kwargs) - elif ".r" in filepath: - return RFileOp(**kwargs) - else: - raise ValueError(f"Unsupported file type: {filepath}") - - def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" - import minio - from minio.credentials import providers - - self.filepath = kwargs["filepath"] - self.input_params = kwargs or {} - self.cos_endpoint = urlparse(self.input_params.get("cos-endpoint")) - self.cos_bucket = self.input_params.get("cos-bucket") - - self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) - - # Infer secure from the endpoint's scheme. - self.secure = self.cos_endpoint.scheme == "https" - - # get minio credentials provider - if "cos-user" in self.input_params and "cos-password" in self.input_params: - cred_provider = providers.StaticProvider( - access_key=self.input_params.get("cos-user"), - secret_key=self.input_params.get("cos-password"), - ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: - cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: - cred_provider = providers.IamAwsProvider() - else: - raise RuntimeError( - "No minio credentials provider can be initialised for current configs. " - "Please validate your runtime configuration details and retry." - ) - - # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) - - @abstractmethod - def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") - - def process_dependencies(self) -> None: - """Process dependencies - - If a dependency archive is present, it will be downloaded from object storage - and expanded into the local directory. - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing dependencies") - t0 = time.time() - archive_file = self.input_params.get("cos-dependencies-archive") - - self.get_file_from_object_storage(archive_file) - - inputs = self.input_params.get("inputs") - if inputs: - input_list = inputs.split(INOUT_SEPARATOR) - for file in input_list: - self.get_file_from_object_storage(file.strip()) - - subprocess.call(["tar", "-zxvf", archive_file]) - duration = time.time() - t0 - OpUtil.log_operation_info("dependencies processed", duration) - - def process_outputs(self) -> None: - """Process outputs - - If outputs have been specified, it will upload the appropriate files to object storage - - This method can be overridden by subclasses, although overrides should first - call the superclass method. - """ - OpUtil.log_operation_info("processing outputs") - t0 = time.time() - outputs = self.input_params.get("outputs") - if outputs: - output_list = outputs.split(INOUT_SEPARATOR) - for file in output_list: - self.process_output_file(file.strip()) - duration = time.time() - t0 - OpUtil.log_operation_info("outputs processed", duration) - - def process_metrics_and_metadata(self) -> None: - """Process metrics and metadata - - This method exposes metrics/metadata that the processed - notebook | script produces in the KFP UI. - - This method should not be overridden by subclasses. - """ - - OpUtil.log_operation_info("processing metrics and metadata") - t0 = time.time() - - # Location where the KFP specific output files will be stored - # in the environment where the bootsrapper is running. - # Defaults to '/tmp' if not specified. - output_path = Path(os.getenv("ELYRA_WRITABLE_CONTAINER_DIR", "/tmp")) - - # verify that output_path exists, is a directory - # and writable by creating a temporary file in that location - try: - with TemporaryFile(mode="w", dir=output_path) as t: - t.write("can write") - except Exception: - # output_path doesn't meet the requirements - # treat this as a non-fatal error and log a warning - logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) - return - - # Name of the proprietary KFP UI metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_ui_metadata_filename = "mlpipeline-ui-metadata.json" - - # Name of the proprietary KFP metadata file. - # Notebooks | scripts might (but don't have to) produce this file - # as documented in - # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ - # Each ExecuteFileOp must declare this as an output file or - # the KFP UI won't pick up the information. - kfp_metrics_filename = "mlpipeline-metrics.json" - - # If the notebook | Python script produced one of the files - # copy it to the target location where KFP is looking for it. - for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: - try: - src = Path(".") / filename - logger.debug(f"Processing {src} ...") - # try to load the file, if one was created by the - # notebook or script - with open(src, "r") as f: - metadata = json.load(f) - - # the file exists and contains valid JSON - logger.debug(f"File content: {json.dumps(metadata)}") - - target = output_path / filename - # try to save the file in the destination location - with open(target, "w") as f: - json.dump(metadata, f) - except FileNotFoundError: - # The script | notebook didn't produce the file - # we are looking for. This is not an error condition - # that needs to be handled. - logger.debug(f"{self.filepath} produced no file named {src}") - except ValueError as ve: - # The file content could not be parsed. Log a warning - # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") - except Exception as ex: - # Something is wrong with the user-generated metadata file. - # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") - - # - # Augment kfp_ui_metadata_filename with Elyra-specific information: - # - link to object storage where input and output artifacts are - # stored - ui_metadata_output = output_path / kfp_ui_metadata_filename - try: - # re-load the file - with open(ui_metadata_output, "r") as f: - metadata = json.load(f) - except Exception: - # ignore all errors - metadata = {} - - # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): - metadata["outputs"] = [] - - # Define HREF for COS bucket: - # // - bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" - ) - - # add Elyra metadata to 'outputs' - metadata["outputs"].append( - { - "storage": "inline", - "source": f"## Inputs for {self.filepath}\n" - f"[{self.input_params['cos-dependencies-archive']}]({bucket_url})", - "type": "markdown", - } - ) - - # print the content of the augmented metadata file - logger.debug(f"Output UI metadata: {json.dumps(metadata)}") - - logger.debug(f"Saving UI metadata file as {ui_metadata_output} ...") - - # Save [updated] KFP UI metadata file - with open(ui_metadata_output, "w") as f: - json.dump(metadata, f) - - duration = time.time() - t0 - OpUtil.log_operation_info("metrics and metadata processed", duration) - - def get_object_storage_filename(self, filename: str) -> str: - """Function to pre-pend cloud storage working dir to file name - - :param filename: the local file - :return: the full path of the object storage file - """ - return os.path.join(self.input_params.get("cos-directory", ""), filename) - - def get_file_from_object_storage(self, file_to_get: str) -> None: - """Utility function to get files from an object storage - - :param file_to_get: filename - """ - - object_to_get = self.get_object_storage_filename(file_to_get) - t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration - ) - - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: - """Utility function to put files into an object storage - - :param file_to_upload: filename - :param object_name: remote filename (used to rename) - """ - - object_to_upload = object_name - if not object_to_upload: - object_to_upload = file_to_upload - - object_to_upload = self.get_object_storage_filename(object_to_upload) - t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) - duration = time.time() - t0 - OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration - ) - - def has_wildcard(self, filename): - wildcards = ["*", "?"] - return bool(any(c in filename for c in wildcards)) - - def process_output_file(self, output_file): - """Puts the file to object storage. Handles wildcards and directories.""" - - matched_files = [output_file] - if self.has_wildcard(output_file): # explode the wildcarded file - matched_files = glob.glob(output_file) - - for matched_file in matched_files: - if os.path.isdir(matched_file): - for file in os.listdir(matched_file): - self.process_output_file(os.path.join(matched_file, file)) - else: - self.put_file_to_object_storage(matched_file) - - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: - """Convert INOUT-separated string of pipeline parameters into a dictionary.""" - parameter_dict = {} - if pipeline_parameters: - parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) - for parameter in parameter_list: - param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): - continue # env vars must be non-empty strings - parameter_dict[param_name] = value - return parameter_dict - - def set_parameters_in_env(self) -> None: - """Make pipeline parameters available as environment variables.""" - for name, value in self.pipeline_param_dict.items(): - if name in os.environ: - continue # avoid overwriting env vars with the same name - os.environ[name] = value - - -class NotebookFileOp(FileOpBase): - """Perform Notebook File Operation""" - - def execute(self) -> None: - """Execute the Notebook and upload results to object storage""" - notebook = os.path.basename(self.filepath) - notebook_name = notebook.replace(".ipynb", "") - notebook_output = f"{notebook_name}-output.ipynb" - notebook_html = f"{notebook_name}.html" - - try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") - t0 = time.time() - # Include kernel selection in execution time - kernel_name = NotebookFileOp.find_best_kernel(notebook) - - kwargs = {} - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - import papermill - - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) - duration = time.time() - t0 - OpUtil.log_operation_info("notebook execution completed", duration) - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - - NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) - self.put_file_to_object_storage(notebook_output, notebook) - self.put_file_to_object_storage(notebook_html) - raise ex - - @staticmethod - def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: - """Function to convert a Jupyter notebook file (.ipynb) into an html file - - :param notebook_file: object storage client - :param html_file: name of what the html output file should be - :return: html_file: the converted notebook in html format - """ - import nbconvert - import nbformat - - OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") - t0 = time.time() - nb = nbformat.read(notebook_file, as_version=4) - html_exporter = nbconvert.HTMLExporter() - data, resources = html_exporter.from_notebook_node(nb) - with open(html_file, "w") as f: - f.write(data) - f.close() - - duration = time.time() - t0 - OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) - return html_file - - @staticmethod - def find_best_kernel(notebook_file: str) -> str: - """Determines the best kernel to use via the following algorithm: - - 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. - 2. Gets the list of configured kernels using KernelSpecManager. - 3. If notebook kernel_name is in list, use that, else - 4. If not found, load each configured kernel.json file and find a language match. - 5. On first match, log info message regarding the switch and use that kernel. - 6. If no language match is found, revert to notebook kernel and log warning message. - """ - from jupyter_client.kernelspec import KernelSpecManager - import nbformat - - nb = nbformat.read(notebook_file, 4) - - nb_kspec = nb.metadata.kernelspec - nb_kernel_name = nb_kspec.get("name") - nb_kernel_lang = nb_kspec.get("language") - - kernel_specs = KernelSpecManager().find_kernel_specs() - - # see if we have a direct match... - if nb_kernel_name in kernel_specs.keys(): - return nb_kernel_name - - # no match found for kernel, try matching language... - for name, file in kernel_specs.items(): - # load file (JSON) and pick out language, if match, use first found - with open(os.path.join(file, "kernel.json")) as f: - kspec = json.load(f) - if kspec.get("language").lower() == nb_kernel_lang.lower(): - matched_kernel = os.path.basename(file) - logger.info( - f"Matched kernel by language ({nb_kernel_lang}), using kernel " - f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'." - ) - return matched_kernel - - # no match found for language, return notebook kernel and let execution fail - logger.warning( - f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " - f"language match ({nb_kernel_lang}) was found in current kernel specifications." - ) - return nb_kernel_name - - -class PythonFileOp(FileOpBase): - """Perform Python File Operation""" - - def execute(self) -> None: - """Execute the Python script and upload results to object storage""" - python_script = os.path.basename(self.filepath) - python_script_name = python_script.replace(".py", "") - # python_script_output = f"{python_script_name}.log" - - try: - OpUtil.log_operation_info( - f"executing python script using 'python3 {python_script}'" - ) - t0 = time.time() - - run_args = ["python3", python_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------Python logs start----------------------") - # Removing support for the s3 storage of python script logs - # with open(python_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------Python logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - duration = time.time() - t0 - OpUtil.log_operation_info("python script execution completed", duration) - - # self.put_file_to_object_storage(python_script_output, python_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(python_script_output, python_script_output) - raise ex - - -class RFileOp(FileOpBase): - """Perform R File Operation""" - - def execute(self) -> None: - """Execute the R script and upload results to object storage""" - r_script = os.path.basename(self.filepath) - r_script_name = r_script.replace(".r", "") - # r_script_output = f"{r_script_name}.log" - - try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}'") - t0 = time.time() - - run_args = ["Rscript", r_script] - if self.parameter_pass_method == "env": - self.set_parameters_in_env() - - logger.info("----------------------R script logs start----------------------") - # Removing support for the s3 storage of R script logs - # with open(r_script_output, "w") as log_file: - # process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - process = subprocess.Popen(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - for line in iter(process.stdout.readline, b''): - sys.stdout.write(line.decode()) - - process.stdout.close() - return_code = process.wait() - logger.info("----------------------R script logs ends----------------------") - if return_code: - raise subprocess.CalledProcessError(return_code, run_args) - - duration = time.time() - t0 - OpUtil.log_operation_info("R script execution completed", duration) - - # self.put_file_to_object_storage(r_script_output, r_script_output) - self.process_outputs() - except Exception as ex: - # log in case of errors - logger.error(f"Unexpected error: {sys.exc_info()[0]}") - logger.error(f"Error details: {ex}") - - # self.put_file_to_object_storage(r_script_output, r_script_output) - raise ex - - -class OpUtil(object): - """Utility functions for preparing file execution.""" - - @classmethod - def package_install(cls, user_volume_path) -> None: - OpUtil.log_operation_info("Installing packages") - t0 = time.time() - requirements_file = cls.determine_elyra_requirements() - elyra_packages = cls.package_list_to_dict(requirements_file) - current_packages = cls.package_list_to_dict("requirements-current.txt") - to_install_list = [] - - for package, ver in elyra_packages.items(): - if package in current_packages: - if current_packages[package] is None: - logger.warning( - f"WARNING: Source package '{package}' found already installed as an " - "editable package. This may conflict with the required version: " - f"{ver} . Skipping..." - ) - continue - try: - version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant - logger.warning( - f"WARNING: Source package '{package}' found already installed from " - f"{current_packages[package]}. This may conflict with the required " - f"version: {ver} . Skipping..." - ) - continue - if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") - to_install_list.append(f"{package}=={ver}") - elif version.Version(ver) < version.Version(current_packages[package]): - logger.info( - f"Newer {package} package with version {current_packages[package]} " - f"already installed. Skipping..." - ) - else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") - to_install_list.append(f"{package}=={ver}") - - if to_install_list: - if user_volume_path: - to_install_list.insert(0, f"--target={user_volume_path}") - to_install_list.append("--no-cache-dir") - - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) - - if user_volume_path: - os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" - - subprocess.run([sys.executable, "-m", "pip", "freeze"]) - duration = time.time() - t0 - OpUtil.log_operation_info("Packages installed", duration) - - @classmethod - def determine_elyra_requirements(cls) -> Any: - if sys.version_info.major == 3: - if sys.version_info.minor in [8, 9, 10, 11]: - return "requirements-elyra.txt" - logger.error( - f"This version of Python '{sys.version_info.major}.{sys.version_info.minor}' " - f"is not supported for Elyra generic components" - ) - return None - - @classmethod - def package_list_to_dict(cls, filename: str) -> dict: - package_dict = {} - with open(filename) as fh: - for line in fh: - if line[0] != "#": - if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") - elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") - elif "==" in line: - package_name, package_version = line.strip("\n").split(sep="==") - elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system - package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory - package_name = os.path.basename(package_name) - package_version = None - else: - # Tolerate other formats but do not add to package list - continue - - package_dict[package_name] = package_version - - return package_dict - - @classmethod - def parse_arguments(cls, args) -> dict: - import argparse - - global pipeline_name, operation_name - - logger.debug("Parsing Arguments.....") - parser = argparse.ArgumentParser() - parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True - ) - parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True - ) - parser.add_argument( - "-d", - "--cos-directory", - dest="cos-directory", - help="Working directory in cloud object storage bucket to use", - required=True, - ) - parser.add_argument( - "-t", - "--cos-dependencies-archive", - dest="cos-dependencies-archive", - help="Archive containing notebook and dependency artifacts", - required=True, - ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) - parser.add_argument( - "-p", - "--user-volume-path", - dest="user-volume-path", - help="Directory in Volume to install python libraries into", - required=False, - ) - parser.add_argument( - "-n", - "--pipeline-name", - dest="pipeline-name", - help="Pipeline name", - required=True, - ) - parser.add_argument( - "-r", - "--pipeline-parameters", - dest="pipeline_parameters", - help="Pipeline parameters that apply to this node", - required=False, - ) - parser.add_argument( - "-m", - "--parameter-pass-method", - dest="parameter_pass_method", - choices=["env"], - help="The method by which pipeline parameters should be applied to this node.", - required=False, - ) - parsed_args = vars(parser.parse_args(args)) - - # set pipeline name as global - pipeline_name = parsed_args.get("pipeline-name") - # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) - - return parsed_args - - @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: - """Produces a formatted log INFO message used entirely for support purposes. - - This method is intended to be called for any entries that should be captured across aggregated - log files to identify steps within a given pipeline and each of its operations. As a result, - calls to this method should produce single-line entries in the log (no embedded newlines). - Each entry is prefixed with the pipeline name. - - General logging should NOT use this method but use logger.() statements directly. - - :param action_clause: str representing the action that is being logged - :param duration_secs: optional float value representing the duration of the action being logged - """ - global pipeline_name, operation_name - if enable_pipeline_info: - duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") - - -def main(): - # Configure logger format, level - logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG - ) - # Setup packages and gather arguments - input_params = OpUtil.parse_arguments(sys.argv[1:]) - OpUtil.log_operation_info("starting operation") - t0 = time.time() - - # Create the appropriate instance, process dependencies and execute the operation - file_op = FileOpBase.get_instance(**input_params) - - file_op.process_dependencies() - - file_op.execute() - - # Process notebook | script metrics and KFP UI metadata - file_op.process_metrics_and_metadata() - - duration = time.time() - t0 - OpUtil.log_operation_info("operation completed", duration) - - -if __name__ == "__main__": - main() \ No newline at end of file From 98ec76cf9d9aed495533dfd027514ed7ef74c5a2 Mon Sep 17 00:00:00 2001 From: William Siqueira Date: Tue, 7 Oct 2025 16:52:58 -0300 Subject: [PATCH 2/6] Updating to Elyra v4.3.0 --- jupyter/datascience/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/datascience/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml | 6 +++--- .../pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/pytorch/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/pytorch/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/tensorflow/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/tensorflow/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/trustyai/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/trustyai/ubi9-python-3.12/pyproject.toml | 2 +- runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu | 2 +- runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu | 2 +- .../pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda | 2 +- runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda | 2 +- runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm | 2 +- runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm | 2 +- runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda | 2 +- 21 files changed, 35 insertions(+), 35 deletions(-) diff --git a/jupyter/datascience/ubi9-python-3.12/pylock.toml b/jupyter/datascience/ubi9-python-3.12/pylock.toml index bc01fdf55a..9d7457cf2a 100644 --- a/jupyter/datascience/ubi9-python-3.12/pylock.toml +++ b/jupyter/datascience/ubi9-python-3.12/pylock.toml @@ -2449,9 +2449,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.2.4" -sdist = { url = "https://files.pythonhosted.org/packages/39/58/9a76992bcd402f7eaf9a23fb164d56993db6c079d3fad67dc0a4df799d03/odh_elyra-4.2.4.tar.gz", upload-time = 2025-09-11T18:09:43Z, size = 2155769, hashes = { sha256 = "9563849a41e3d5f45f4923a00d9e6480a9d53787e076d6369248f880795d2130" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/f9/48/b852bc3107a6d92c1c6c63b0fa6255897e515df6948778e521f603f59e75/odh_elyra-4.2.4-py3-none-any.whl", upload-time = 2025-09-11T18:09:41Z, size = 4317667, hashes = { sha256 = "227b1a35a3eef8a02409e3aa5081c3322bcd30cd8939b77575dfce397f3fa42b" } }] +version = "4.3.0" +sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/datascience/ubi9-python-3.12/pyproject.toml b/jupyter/datascience/ubi9-python-3.12/pyproject.toml index 86ada552df..5140d1dca6 100644 --- a/jupyter/datascience/ubi9-python-3.12/pyproject.toml +++ b/jupyter/datascience/ubi9-python-3.12/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.2.4", + "odh-elyra==4.3.0", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml index c5299da624..3a6f40f12f 100644 --- a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml +++ b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml @@ -2703,9 +2703,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.2.4" -sdist = { url = "https://files.pythonhosted.org/packages/39/58/9a76992bcd402f7eaf9a23fb164d56993db6c079d3fad67dc0a4df799d03/odh_elyra-4.2.4.tar.gz", upload-time = 2025-09-11T18:09:43Z, size = 2155769, hashes = { sha256 = "9563849a41e3d5f45f4923a00d9e6480a9d53787e076d6369248f880795d2130" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/f9/48/b852bc3107a6d92c1c6c63b0fa6255897e515df6948778e521f603f59e75/odh_elyra-4.2.4-py3-none-any.whl", upload-time = 2025-09-11T18:09:41Z, size = 4317667, hashes = { sha256 = "227b1a35a3eef8a02409e3aa5081c3322bcd30cd8939b77575dfce397f3fa42b" } }] +version = "4.3.0" +sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml index 018ef3541b..33a29165f5 100644 --- a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml +++ b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.2.4", + "odh-elyra==4.3.0", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/pytorch/ubi9-python-3.12/pylock.toml b/jupyter/pytorch/ubi9-python-3.12/pylock.toml index ff861d0775..790360773a 100644 --- a/jupyter/pytorch/ubi9-python-3.12/pylock.toml +++ b/jupyter/pytorch/ubi9-python-3.12/pylock.toml @@ -2594,9 +2594,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.2.4" -sdist = { url = "https://files.pythonhosted.org/packages/39/58/9a76992bcd402f7eaf9a23fb164d56993db6c079d3fad67dc0a4df799d03/odh_elyra-4.2.4.tar.gz", upload-time = 2025-09-11T18:09:43Z, size = 2155769, hashes = { sha256 = "9563849a41e3d5f45f4923a00d9e6480a9d53787e076d6369248f880795d2130" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/f9/48/b852bc3107a6d92c1c6c63b0fa6255897e515df6948778e521f603f59e75/odh_elyra-4.2.4-py3-none-any.whl", upload-time = 2025-09-11T18:09:41Z, size = 4317667, hashes = { sha256 = "227b1a35a3eef8a02409e3aa5081c3322bcd30cd8939b77575dfce397f3fa42b" } }] +version = "4.3.0" +sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/pytorch/ubi9-python-3.12/pyproject.toml b/jupyter/pytorch/ubi9-python-3.12/pyproject.toml index 335fa55bb3..f2a759f14a 100644 --- a/jupyter/pytorch/ubi9-python-3.12/pyproject.toml +++ b/jupyter/pytorch/ubi9-python-3.12/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.2.4", + "odh-elyra==4.3.0", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml b/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml index 85d8dc1899..47efb465e0 100644 --- a/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml +++ b/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml @@ -2456,9 +2456,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.2.4" -sdist = { url = "https://files.pythonhosted.org/packages/39/58/9a76992bcd402f7eaf9a23fb164d56993db6c079d3fad67dc0a4df799d03/odh_elyra-4.2.4.tar.gz", upload-time = 2025-09-11T18:09:43Z, size = 2155769, hashes = { sha256 = "9563849a41e3d5f45f4923a00d9e6480a9d53787e076d6369248f880795d2130" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/f9/48/b852bc3107a6d92c1c6c63b0fa6255897e515df6948778e521f603f59e75/odh_elyra-4.2.4-py3-none-any.whl", upload-time = 2025-09-11T18:09:41Z, size = 4317667, hashes = { sha256 = "227b1a35a3eef8a02409e3aa5081c3322bcd30cd8939b77575dfce397f3fa42b" } }] +version = "4.3.0" +sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml b/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml index 6634e12808..c5f1e3ef18 100644 --- a/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml +++ b/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.2.4", + "odh-elyra==4.3.0", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml b/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml index 9df37208cc..6ad02ac157 100644 --- a/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml +++ b/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml @@ -2508,9 +2508,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.2.4" -sdist = { url = "https://files.pythonhosted.org/packages/39/58/9a76992bcd402f7eaf9a23fb164d56993db6c079d3fad67dc0a4df799d03/odh_elyra-4.2.4.tar.gz", upload-time = 2025-09-11T18:09:43Z, size = 2155769, hashes = { sha256 = "9563849a41e3d5f45f4923a00d9e6480a9d53787e076d6369248f880795d2130" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/f9/48/b852bc3107a6d92c1c6c63b0fa6255897e515df6948778e521f603f59e75/odh_elyra-4.2.4-py3-none-any.whl", upload-time = 2025-09-11T18:09:41Z, size = 4317667, hashes = { sha256 = "227b1a35a3eef8a02409e3aa5081c3322bcd30cd8939b77575dfce397f3fa42b" } }] +version = "4.3.0" +sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml b/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml index e6995c7d47..6b792ec466 100644 --- a/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml +++ b/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.2.4", + "odh-elyra==4.3.0", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/tensorflow/ubi9-python-3.12/pylock.toml b/jupyter/tensorflow/ubi9-python-3.12/pylock.toml index 6ee29420dc..d86d20098b 100644 --- a/jupyter/tensorflow/ubi9-python-3.12/pylock.toml +++ b/jupyter/tensorflow/ubi9-python-3.12/pylock.toml @@ -2619,9 +2619,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.2.4" -sdist = { url = "https://files.pythonhosted.org/packages/39/58/9a76992bcd402f7eaf9a23fb164d56993db6c079d3fad67dc0a4df799d03/odh_elyra-4.2.4.tar.gz", upload-time = 2025-09-11T18:09:43Z, size = 2155769, hashes = { sha256 = "9563849a41e3d5f45f4923a00d9e6480a9d53787e076d6369248f880795d2130" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/f9/48/b852bc3107a6d92c1c6c63b0fa6255897e515df6948778e521f603f59e75/odh_elyra-4.2.4-py3-none-any.whl", upload-time = 2025-09-11T18:09:41Z, size = 4317667, hashes = { sha256 = "227b1a35a3eef8a02409e3aa5081c3322bcd30cd8939b77575dfce397f3fa42b" } }] +version = "4.3.0" +sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml b/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml index afba6abf18..007dbaba18 100644 --- a/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml +++ b/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.2.4", + "odh-elyra==4.3.0", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/trustyai/ubi9-python-3.12/pylock.toml b/jupyter/trustyai/ubi9-python-3.12/pylock.toml index da6d81fa9b..898a13830f 100644 --- a/jupyter/trustyai/ubi9-python-3.12/pylock.toml +++ b/jupyter/trustyai/ubi9-python-3.12/pylock.toml @@ -2319,9 +2319,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.2.4" -sdist = { url = "https://files.pythonhosted.org/packages/39/58/9a76992bcd402f7eaf9a23fb164d56993db6c079d3fad67dc0a4df799d03/odh_elyra-4.2.4.tar.gz", upload-time = 2025-09-11T18:09:43Z, size = 2155769, hashes = { sha256 = "9563849a41e3d5f45f4923a00d9e6480a9d53787e076d6369248f880795d2130" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/f9/48/b852bc3107a6d92c1c6c63b0fa6255897e515df6948778e521f603f59e75/odh_elyra-4.2.4-py3-none-any.whl", upload-time = 2025-09-11T18:09:41Z, size = 4317667, hashes = { sha256 = "227b1a35a3eef8a02409e3aa5081c3322bcd30cd8939b77575dfce397f3fa42b" } }] +version = "4.3.0" +sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/trustyai/ubi9-python-3.12/pyproject.toml b/jupyter/trustyai/ubi9-python-3.12/pyproject.toml index 32d687d869..27a65cb76c 100644 --- a/jupyter/trustyai/ubi9-python-3.12/pyproject.toml +++ b/jupyter/trustyai/ubi9-python-3.12/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.2.4", + "odh-elyra==4.3.0", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu b/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu index d2ff30e3d9..6ec8d50b33 100644 --- a/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu +++ b/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu @@ -331,7 +331,7 @@ COPY ${DATASCIENCE_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${DATASCIENCE_SOURCE_CODE}/utils ./utils/ # Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ -o ./utils/bootstrapper.py RUN --mount=type=cache,target=/root/.cache/pip \ diff --git a/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu b/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu index 4b9ede4d11..60767c34a8 100644 --- a/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu +++ b/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu @@ -74,7 +74,7 @@ COPY ${MINIMAL_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${MINIMAL_SOURCE_CODE}/utils ./utils/ # Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ diff --git a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda b/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda index 035980a4aa..6b0bdd4970 100644 --- a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda @@ -69,7 +69,7 @@ COPY ${PYTORCH_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ # Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ diff --git a/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda b/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda index 8a00819859..b9f3c2ba85 100644 --- a/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda @@ -69,7 +69,7 @@ COPY ${PYTORCH_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ # Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ diff --git a/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm b/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm index c1ace8a85d..77a8daef97 100644 --- a/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm +++ b/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm @@ -70,7 +70,7 @@ COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ COPY ${PYTORCH_SOURCE_CODE}/de-vendor-torch.sh ./ # Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ diff --git a/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm b/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm index 587add0b5d..ebda0ed77f 100644 --- a/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm +++ b/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm @@ -68,7 +68,7 @@ COPY ${TENSORFLOW_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${TENSORFLOW_SOURCE_CODE}/utils ./utils/ # Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ diff --git a/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda b/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda index 31044dd6a9..ff73b61f69 100644 --- a/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda @@ -72,7 +72,7 @@ COPY ${TENSORFLOW_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${TENSORFLOW_SOURCE_CODE}/utils ./utils/ # Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/elyra/kfp/bootstrapper.py \ +RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ From 4f5acb3079f555f610ad2acec92a478267868526 Mon Sep 17 00:00:00 2001 From: William Siqueira Date: Thu, 9 Oct 2025 09:56:26 -0300 Subject: [PATCH 3/6] Copying bootstrapper.py from local python package --- jupyter/datascience/ubi9-python-3.12/setup-elyra.sh | 5 ++++- runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu | 3 --- .../pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda | 3 --- runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda | 3 --- runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm | 4 ---- runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm | 3 --- runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda | 3 --- 7 files changed, 4 insertions(+), 20 deletions(-) diff --git a/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh b/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh index c1c0ee22b6..e762771ab0 100644 --- a/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh +++ b/jupyter/datascience/ubi9-python-3.12/setup-elyra.sh @@ -1,6 +1,9 @@ #!/bin/bash set -x +# By copying this we must make sure that ELYRA_INSTALL_PACKAGES=false +cp /opt/app-root/lib/python3.12/site-packages/elyra/kfp/bootstrapper.py /opt/app-root/bin/utils/ + # Set the elyra config on the right path jupyter elyra --generate-config cp /opt/app-root/bin/utils/jupyter_elyra_config.py /opt/app-root/src/.jupyter/ @@ -24,4 +27,4 @@ export KF_PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ export KF_PIPELINES_SA_TOKEN_ENV="/var/run/secrets/kubernetes.io/serviceaccount/token" export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token" export ELYRA_INSTALL_PACKAGES="false" -export ELYRA_GENERIC_NODES_ENABLE_SCRIPT_OUTPUT_TO_S3="false" \ No newline at end of file +export ELYRA_GENERIC_NODES_ENABLE_SCRIPT_OUTPUT_TO_S3="false" diff --git a/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu b/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu index 6ec8d50b33..22713dff80 100644 --- a/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu +++ b/runtimes/datascience/ubi9-python-3.12/Dockerfile.cpu @@ -330,9 +330,6 @@ fi COPY ${DATASCIENCE_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${DATASCIENCE_SOURCE_CODE}/utils ./utils/ -# Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ - -o ./utils/bootstrapper.py RUN --mount=type=cache,target=/root/.cache/pip \ echo "Installing softwares and packages" && \ diff --git a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda b/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda index 6b0bdd4970..55a2a195c9 100644 --- a/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/pytorch+llmcompressor/ubi9-python-3.12/Dockerfile.cuda @@ -68,9 +68,6 @@ WORKDIR /opt/app-root/bin COPY ${PYTORCH_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ -# Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ - -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda b/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda index b9f3c2ba85..5a79faea93 100644 --- a/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/pytorch/ubi9-python-3.12/Dockerfile.cuda @@ -68,9 +68,6 @@ WORKDIR /opt/app-root/bin COPY ${PYTORCH_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ -# Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ - -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm b/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm index 77a8daef97..d14a8b87cc 100644 --- a/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm +++ b/runtimes/rocm-pytorch/ubi9-python-3.12/Dockerfile.rocm @@ -69,10 +69,6 @@ COPY ${PYTORCH_SOURCE_CODE}/utils ./utils/ # Copy utility script COPY ${PYTORCH_SOURCE_CODE}/de-vendor-torch.sh ./ -# Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ - -o ./utils/bootstrapper.py - RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, # we often don't know the correct hashes and `--require-hashes` would therefore fail on non amd64, where building is common. diff --git a/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm b/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm index ebda0ed77f..b8a464df23 100644 --- a/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm +++ b/runtimes/rocm-tensorflow/ubi9-python-3.12/Dockerfile.rocm @@ -67,9 +67,6 @@ WORKDIR /opt/app-root/bin COPY ${TENSORFLOW_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${TENSORFLOW_SOURCE_CODE}/utils ./utils/ -# Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ - -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, diff --git a/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda b/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda index ff73b61f69..c6663ef1b0 100644 --- a/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda +++ b/runtimes/tensorflow/ubi9-python-3.12/Dockerfile.cuda @@ -71,9 +71,6 @@ WORKDIR /opt/app-root/bin COPY ${TENSORFLOW_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${TENSORFLOW_SOURCE_CODE}/utils ./utils/ -# Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ - -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, From d51378c7a7694ba36bfe9b1c8f1f1227d122bef6 Mon Sep 17 00:00:00 2001 From: William Siqueira Date: Thu, 9 Oct 2025 13:28:59 -0300 Subject: [PATCH 4/6] Updating ODH Elyra to 4.3.1 to include pipeline rename fix --- jupyter/datascience/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/datascience/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml | 6 +++--- .../pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/pytorch/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/pytorch/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/tensorflow/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/tensorflow/ubi9-python-3.12/pyproject.toml | 2 +- jupyter/trustyai/ubi9-python-3.12/pylock.toml | 6 +++--- jupyter/trustyai/ubi9-python-3.12/pyproject.toml | 2 +- 14 files changed, 28 insertions(+), 28 deletions(-) diff --git a/jupyter/datascience/ubi9-python-3.12/pylock.toml b/jupyter/datascience/ubi9-python-3.12/pylock.toml index 9d7457cf2a..d16457180e 100644 --- a/jupyter/datascience/ubi9-python-3.12/pylock.toml +++ b/jupyter/datascience/ubi9-python-3.12/pylock.toml @@ -2449,9 +2449,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.3.0" -sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] +version = "4.3.1" +sdist = { url = "https://files.pythonhosted.org/packages/f7/f4/6be53ca16125e7d3ca6cb86aa8f001ebe200977e429bedb4b6467e692328/odh_elyra-4.3.1.tar.gz", upload-time = 2025-10-09T14:53:22Z, size = 2196705, hashes = { sha256 = "522c85c647d97d3a5317389dc106452fdebf48f95ecbdea3db0aef1b43192475" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/b3/7a/009a5eb3e872330c22155928ffd3dcb8f139385b7de2a7540cad714be9bd/odh_elyra-4.3.1-py3-none-any.whl", upload-time = 2025-10-09T14:53:20Z, size = 4352762, hashes = { sha256 = "ae1008e0329e14d45a4c31b5436b00e732dcaa201cae15adb7f502249bf9d7a8" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/datascience/ubi9-python-3.12/pyproject.toml b/jupyter/datascience/ubi9-python-3.12/pyproject.toml index 5140d1dca6..2125fefab9 100644 --- a/jupyter/datascience/ubi9-python-3.12/pyproject.toml +++ b/jupyter/datascience/ubi9-python-3.12/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.3.0", + "odh-elyra==4.3.1", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml index 3a6f40f12f..41adb0f81a 100644 --- a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml +++ b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pylock.toml @@ -2703,9 +2703,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.3.0" -sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] +version = "4.3.1" +sdist = { url = "https://files.pythonhosted.org/packages/f7/f4/6be53ca16125e7d3ca6cb86aa8f001ebe200977e429bedb4b6467e692328/odh_elyra-4.3.1.tar.gz", upload-time = 2025-10-09T14:53:22Z, size = 2196705, hashes = { sha256 = "522c85c647d97d3a5317389dc106452fdebf48f95ecbdea3db0aef1b43192475" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/b3/7a/009a5eb3e872330c22155928ffd3dcb8f139385b7de2a7540cad714be9bd/odh_elyra-4.3.1-py3-none-any.whl", upload-time = 2025-10-09T14:53:20Z, size = 4352762, hashes = { sha256 = "ae1008e0329e14d45a4c31b5436b00e732dcaa201cae15adb7f502249bf9d7a8" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml index 33a29165f5..94268c30eb 100644 --- a/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml +++ b/jupyter/pytorch+llmcompressor/ubi9-python-3.12/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.3.0", + "odh-elyra==4.3.1", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/pytorch/ubi9-python-3.12/pylock.toml b/jupyter/pytorch/ubi9-python-3.12/pylock.toml index 790360773a..719fcfe99b 100644 --- a/jupyter/pytorch/ubi9-python-3.12/pylock.toml +++ b/jupyter/pytorch/ubi9-python-3.12/pylock.toml @@ -2594,9 +2594,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.3.0" -sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] +version = "4.3.1" +sdist = { url = "https://files.pythonhosted.org/packages/f7/f4/6be53ca16125e7d3ca6cb86aa8f001ebe200977e429bedb4b6467e692328/odh_elyra-4.3.1.tar.gz", upload-time = 2025-10-09T14:53:22Z, size = 2196705, hashes = { sha256 = "522c85c647d97d3a5317389dc106452fdebf48f95ecbdea3db0aef1b43192475" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/b3/7a/009a5eb3e872330c22155928ffd3dcb8f139385b7de2a7540cad714be9bd/odh_elyra-4.3.1-py3-none-any.whl", upload-time = 2025-10-09T14:53:20Z, size = 4352762, hashes = { sha256 = "ae1008e0329e14d45a4c31b5436b00e732dcaa201cae15adb7f502249bf9d7a8" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/pytorch/ubi9-python-3.12/pyproject.toml b/jupyter/pytorch/ubi9-python-3.12/pyproject.toml index f2a759f14a..d1fec92459 100644 --- a/jupyter/pytorch/ubi9-python-3.12/pyproject.toml +++ b/jupyter/pytorch/ubi9-python-3.12/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.3.0", + "odh-elyra==4.3.1", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml b/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml index 47efb465e0..9f3b42c06d 100644 --- a/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml +++ b/jupyter/rocm/pytorch/ubi9-python-3.12/pylock.toml @@ -2456,9 +2456,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.3.0" -sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] +version = "4.3.1" +sdist = { url = "https://files.pythonhosted.org/packages/f7/f4/6be53ca16125e7d3ca6cb86aa8f001ebe200977e429bedb4b6467e692328/odh_elyra-4.3.1.tar.gz", upload-time = 2025-10-09T14:53:22Z, size = 2196705, hashes = { sha256 = "522c85c647d97d3a5317389dc106452fdebf48f95ecbdea3db0aef1b43192475" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/b3/7a/009a5eb3e872330c22155928ffd3dcb8f139385b7de2a7540cad714be9bd/odh_elyra-4.3.1-py3-none-any.whl", upload-time = 2025-10-09T14:53:20Z, size = 4352762, hashes = { sha256 = "ae1008e0329e14d45a4c31b5436b00e732dcaa201cae15adb7f502249bf9d7a8" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml b/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml index c5f1e3ef18..3dbd029019 100644 --- a/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml +++ b/jupyter/rocm/pytorch/ubi9-python-3.12/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.3.0", + "odh-elyra==4.3.1", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml b/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml index 6ad02ac157..9c41a7e56c 100644 --- a/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml +++ b/jupyter/rocm/tensorflow/ubi9-python-3.12/pylock.toml @@ -2508,9 +2508,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.3.0" -sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] +version = "4.3.1" +sdist = { url = "https://files.pythonhosted.org/packages/f7/f4/6be53ca16125e7d3ca6cb86aa8f001ebe200977e429bedb4b6467e692328/odh_elyra-4.3.1.tar.gz", upload-time = 2025-10-09T14:53:22Z, size = 2196705, hashes = { sha256 = "522c85c647d97d3a5317389dc106452fdebf48f95ecbdea3db0aef1b43192475" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/b3/7a/009a5eb3e872330c22155928ffd3dcb8f139385b7de2a7540cad714be9bd/odh_elyra-4.3.1-py3-none-any.whl", upload-time = 2025-10-09T14:53:20Z, size = 4352762, hashes = { sha256 = "ae1008e0329e14d45a4c31b5436b00e732dcaa201cae15adb7f502249bf9d7a8" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml b/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml index 6b792ec466..9f95749ff3 100644 --- a/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml +++ b/jupyter/rocm/tensorflow/ubi9-python-3.12/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.3.0", + "odh-elyra==4.3.1", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/tensorflow/ubi9-python-3.12/pylock.toml b/jupyter/tensorflow/ubi9-python-3.12/pylock.toml index d86d20098b..50b2df2a8a 100644 --- a/jupyter/tensorflow/ubi9-python-3.12/pylock.toml +++ b/jupyter/tensorflow/ubi9-python-3.12/pylock.toml @@ -2619,9 +2619,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.3.0" -sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] +version = "4.3.1" +sdist = { url = "https://files.pythonhosted.org/packages/f7/f4/6be53ca16125e7d3ca6cb86aa8f001ebe200977e429bedb4b6467e692328/odh_elyra-4.3.1.tar.gz", upload-time = 2025-10-09T14:53:22Z, size = 2196705, hashes = { sha256 = "522c85c647d97d3a5317389dc106452fdebf48f95ecbdea3db0aef1b43192475" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/b3/7a/009a5eb3e872330c22155928ffd3dcb8f139385b7de2a7540cad714be9bd/odh_elyra-4.3.1-py3-none-any.whl", upload-time = 2025-10-09T14:53:20Z, size = 4352762, hashes = { sha256 = "ae1008e0329e14d45a4c31b5436b00e732dcaa201cae15adb7f502249bf9d7a8" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml b/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml index 007dbaba18..9291801af4 100644 --- a/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml +++ b/jupyter/tensorflow/ubi9-python-3.12/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.3.0", + "odh-elyra==4.3.1", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", diff --git a/jupyter/trustyai/ubi9-python-3.12/pylock.toml b/jupyter/trustyai/ubi9-python-3.12/pylock.toml index 898a13830f..ee34bbcd8c 100644 --- a/jupyter/trustyai/ubi9-python-3.12/pylock.toml +++ b/jupyter/trustyai/ubi9-python-3.12/pylock.toml @@ -2319,9 +2319,9 @@ wheels = [{ url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df83 [[packages]] name = "odh-elyra" -version = "4.3.0" -sdist = { url = "https://files.pythonhosted.org/packages/64/60/09bacf2a747e902ecdf72fbcdcd335c9232b455e37c57261e53c56ef472b/odh_elyra-4.3.0.tar.gz", upload-time = 2025-10-07T18:01:00Z, size = 2196737, hashes = { sha256 = "907ff7ec351677bebea262439deee8c3fedcc185fd3f3c4536adb150fb50eaae" } } -wheels = [{ url = "https://files.pythonhosted.org/packages/5b/19/c8db67de0fab5190e0b186f1321f02301d71e5a3cefbd337b0d67c6d676b/odh_elyra-4.3.0-py3-none-any.whl", upload-time = 2025-10-07T18:00:58Z, size = 4352807, hashes = { sha256 = "a5371618994024c2d48b55558342493f84ddf384c3c815c8c086b52aa4b57fda" } }] +version = "4.3.1" +sdist = { url = "https://files.pythonhosted.org/packages/f7/f4/6be53ca16125e7d3ca6cb86aa8f001ebe200977e429bedb4b6467e692328/odh_elyra-4.3.1.tar.gz", upload-time = 2025-10-09T14:53:22Z, size = 2196705, hashes = { sha256 = "522c85c647d97d3a5317389dc106452fdebf48f95ecbdea3db0aef1b43192475" } } +wheels = [{ url = "https://files.pythonhosted.org/packages/b3/7a/009a5eb3e872330c22155928ffd3dcb8f139385b7de2a7540cad714be9bd/odh_elyra-4.3.1-py3-none-any.whl", upload-time = 2025-10-09T14:53:20Z, size = 4352762, hashes = { sha256 = "ae1008e0329e14d45a4c31b5436b00e732dcaa201cae15adb7f502249bf9d7a8" } }] [[packages]] name = "odh-jupyter-trash-cleanup" diff --git a/jupyter/trustyai/ubi9-python-3.12/pyproject.toml b/jupyter/trustyai/ubi9-python-3.12/pyproject.toml index 27a65cb76c..a5b361930d 100644 --- a/jupyter/trustyai/ubi9-python-3.12/pyproject.toml +++ b/jupyter/trustyai/ubi9-python-3.12/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "mysql-connector-python~=9.4.0", # JupyterLab packages - "odh-elyra==4.3.0", + "odh-elyra==4.3.1", "odh-jupyter-trash-cleanup==0.1.1", "jupyterlab==4.4.7", From 895639c13231bfeb31b2f280e299c5efb186a69e Mon Sep 17 00:00:00 2001 From: William Siqueira Date: Thu, 9 Oct 2025 15:53:58 -0300 Subject: [PATCH 5/6] Restoring Dockerfile because we are copying the bootstrapper from local installation --- runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu | 3 --- 1 file changed, 3 deletions(-) diff --git a/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu b/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu index 60767c34a8..18f56a95e2 100644 --- a/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu +++ b/runtimes/minimal/ubi9-python-3.12/Dockerfile.cpu @@ -73,9 +73,6 @@ WORKDIR /opt/app-root/bin COPY ${MINIMAL_SOURCE_CODE}/pylock.toml ./ # Copy Elyra dependencies for air-gapped enviroment COPY ${MINIMAL_SOURCE_CODE}/utils ./utils/ -# Download Elyra boostrapper.py -RUN curl -fL https://raw.githubusercontent.com/opendatahub-io/elyra/refs/tags/v4.3.0/elyra/kfp/bootstrapper.py \ - -o ./utils/bootstrapper.py RUN echo "Installing softwares and packages" && \ # This may have to download and compile some dependencies, and as we don't lock requirements from `build-system.requires`, From 6b2415d200a3c4d0fcacf27188835851f1f57cf6 Mon Sep 17 00:00:00 2001 From: William Siqueira Date: Fri, 10 Oct 2025 09:39:15 -0300 Subject: [PATCH 6/6] Updating Manifests --- manifests/base/jupyter-datascience-notebook-imagestream.yaml | 4 ++-- manifests/base/jupyter-pytorch-llmcompressor-imagestream.yaml | 2 +- manifests/base/jupyter-pytorch-notebook-imagestream.yaml | 4 ++-- manifests/base/jupyter-rocm-pytorch-notebook-imagestream.yaml | 4 ++-- .../base/jupyter-rocm-tensorflow-notebook-imagestream.yaml | 4 ++-- manifests/base/jupyter-tensorflow-notebook-imagestream.yaml | 4 ++-- manifests/base/jupyter-trustyai-notebook-imagestream.yaml | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/manifests/base/jupyter-datascience-notebook-imagestream.yaml b/manifests/base/jupyter-datascience-notebook-imagestream.yaml index 3d6cf1c3c8..ff1ba8c85a 100644 --- a/manifests/base/jupyter-datascience-notebook-imagestream.yaml +++ b/manifests/base/jupyter-datascience-notebook-imagestream.yaml @@ -33,7 +33,7 @@ spec: {"name": "Pandas", "version": "2.3"}, {"name": "Scikit-learn", "version": "1.7"}, {"name": "Scipy", "version": "1.16"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.14"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.31"}, @@ -71,7 +71,7 @@ spec: {"name": "Pandas", "version": "2.2"}, {"name": "Scikit-learn", "version": "1.6"}, {"name": "Scipy", "version": "1.15"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.11"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.30"}, diff --git a/manifests/base/jupyter-pytorch-llmcompressor-imagestream.yaml b/manifests/base/jupyter-pytorch-llmcompressor-imagestream.yaml index 71316bb74b..76eab4cc95 100644 --- a/manifests/base/jupyter-pytorch-llmcompressor-imagestream.yaml +++ b/manifests/base/jupyter-pytorch-llmcompressor-imagestream.yaml @@ -39,7 +39,7 @@ spec: {"name": "Pandas", "version": "2.3"}, {"name": "Scikit-learn", "version": "1.7"}, {"name": "Scipy", "version": "1.16"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.14"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.31"}, diff --git a/manifests/base/jupyter-pytorch-notebook-imagestream.yaml b/manifests/base/jupyter-pytorch-notebook-imagestream.yaml index 89df4e0e2a..99e0935095 100644 --- a/manifests/base/jupyter-pytorch-notebook-imagestream.yaml +++ b/manifests/base/jupyter-pytorch-notebook-imagestream.yaml @@ -38,7 +38,7 @@ spec: {"name": "Pandas", "version": "2.3"}, {"name": "Scikit-learn", "version": "1.7"}, {"name": "Scipy", "version": "1.16"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.14"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.31"}, @@ -80,7 +80,7 @@ spec: {"name": "Pandas", "version": "2.2"}, {"name": "Scikit-learn", "version": "1.6"}, {"name": "Scipy", "version": "1.15"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.11"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.30"}, diff --git a/manifests/base/jupyter-rocm-pytorch-notebook-imagestream.yaml b/manifests/base/jupyter-rocm-pytorch-notebook-imagestream.yaml index cd7f0d4869..dd1245df58 100644 --- a/manifests/base/jupyter-rocm-pytorch-notebook-imagestream.yaml +++ b/manifests/base/jupyter-rocm-pytorch-notebook-imagestream.yaml @@ -36,7 +36,7 @@ spec: {"name": "Pandas", "version": "2.3"}, {"name": "Scikit-learn", "version": "1.7"}, {"name": "Scipy", "version": "1.16"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.14"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.31"}, @@ -76,7 +76,7 @@ spec: {"name": "Pandas", "version": "2.2"}, {"name": "Scikit-learn", "version": "1.6"}, {"name": "Scipy", "version": "1.15"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.11"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.30"}, diff --git a/manifests/base/jupyter-rocm-tensorflow-notebook-imagestream.yaml b/manifests/base/jupyter-rocm-tensorflow-notebook-imagestream.yaml index 3a9f3d3e13..05a966a15d 100644 --- a/manifests/base/jupyter-rocm-tensorflow-notebook-imagestream.yaml +++ b/manifests/base/jupyter-rocm-tensorflow-notebook-imagestream.yaml @@ -36,7 +36,7 @@ spec: {"name": "Pandas", "version": "2.3"}, {"name": "Scikit-learn", "version": "1.7"}, {"name": "Scipy", "version": "1.16"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.14"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.31"}, @@ -75,7 +75,7 @@ spec: {"name": "Pandas", "version": "2.2"}, {"name": "Scikit-learn", "version": "1.6"}, {"name": "Scipy", "version": "1.15"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.11"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.30"}, diff --git a/manifests/base/jupyter-tensorflow-notebook-imagestream.yaml b/manifests/base/jupyter-tensorflow-notebook-imagestream.yaml index dd6da54ce9..1e5150f17f 100644 --- a/manifests/base/jupyter-tensorflow-notebook-imagestream.yaml +++ b/manifests/base/jupyter-tensorflow-notebook-imagestream.yaml @@ -39,7 +39,7 @@ spec: {"name": "Pandas", "version": "2.3"}, {"name": "Scikit-learn", "version": "1.7"}, {"name": "Scipy", "version": "1.16"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.14"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.31"}, @@ -81,7 +81,7 @@ spec: {"name": "Pandas", "version": "2.2"}, {"name": "Scikit-learn", "version": "1.6"}, {"name": "Scipy", "version": "1.15"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.11"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.30"}, diff --git a/manifests/base/jupyter-trustyai-notebook-imagestream.yaml b/manifests/base/jupyter-trustyai-notebook-imagestream.yaml index c85a4f0e73..46659780c3 100644 --- a/manifests/base/jupyter-trustyai-notebook-imagestream.yaml +++ b/manifests/base/jupyter-trustyai-notebook-imagestream.yaml @@ -38,7 +38,7 @@ spec: {"name": "Pandas", "version": "1.5"}, {"name": "Scikit-learn", "version": "1.7"}, {"name": "Scipy", "version": "1.16"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.14"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.31"}, @@ -80,7 +80,7 @@ spec: {"name": "Pandas", "version": "1.5"}, {"name": "Scikit-learn", "version": "1.5"}, {"name": "Scipy", "version": "1.15"}, - {"name": "Odh-Elyra", "version": "4.2"}, + {"name": "Odh-Elyra", "version": "4.3"}, {"name": "PyMongo", "version": "4.11"}, {"name": "Pyodbc", "version": "5.2"}, {"name": "Codeflare-SDK", "version": "0.30"},