|
18 | 18 |
|
19 | 19 | from enum import Enum |
20 | 20 | from typing import Dict, List, Set, Union, Optional, Any, TYPE_CHECKING |
21 | | -from urllib.parse import urlparse |
22 | 21 |
|
23 | 22 | import attr |
24 | 23 |
|
@@ -465,6 +464,7 @@ def __init__( |
465 | 464 | self.step_args = step_args |
466 | 465 | self.estimator = estimator |
467 | 466 | self.inputs = inputs |
| 467 | + self.job_name = None |
468 | 468 |
|
469 | 469 | self._properties = Properties( |
470 | 470 | step_name=name, step=self, shape_name="DescribeTrainingJobResponse" |
@@ -493,19 +493,6 @@ def __init__( |
493 | 493 | DeprecationWarning, |
494 | 494 | ) |
495 | 495 |
|
496 | | - self.job_name = None |
497 | | - if estimator and (estimator.source_dir or estimator.entry_point): |
498 | | - # By default, `Estimator` will upload the local code to an S3 path |
499 | | - # containing a timestamp. This causes cache misses whenever a |
500 | | - # pipeline is updated, even if the underlying script hasn't changed. |
501 | | - # To avoid this, hash the contents of the training script and include it |
502 | | - # in the `job_name` passed to the `Estimator`, which will be used |
503 | | - # instead of the timestamped path. |
504 | | - if not is_pipeline_variable(estimator.source_dir) and not is_pipeline_variable( |
505 | | - estimator.entry_point |
506 | | - ): |
507 | | - self.job_name = self._generate_code_upload_path() |
508 | | - |
509 | 496 | @property |
510 | 497 | def arguments(self) -> RequestType: |
511 | 498 | """The arguments dictionary that is used to call `create_training_job`. |
@@ -554,26 +541,6 @@ def to_request(self) -> RequestType: |
554 | 541 |
|
555 | 542 | return request_dict |
556 | 543 |
|
557 | | - def _generate_code_upload_path(self) -> str or None: |
558 | | - """Generate an upload path for local training scripts based on their content.""" |
559 | | - from sagemaker.workflow.utilities import hash_files_or_dirs |
560 | | - |
561 | | - if self.estimator.source_dir: |
562 | | - source_dir_url = urlparse(self.estimator.source_dir) |
563 | | - if source_dir_url.scheme == "" or source_dir_url.scheme == "file": |
564 | | - code_hash = hash_files_or_dirs( |
565 | | - [self.estimator.source_dir] + self.estimator.dependencies |
566 | | - ) |
567 | | - return f"{self.name}-{code_hash}"[:1024] |
568 | | - elif self.estimator.entry_point: |
569 | | - entry_point_url = urlparse(self.estimator.entry_point) |
570 | | - if entry_point_url.scheme == "" or entry_point_url.scheme == "file": |
571 | | - code_hash = hash_files_or_dirs( |
572 | | - [self.estimator.entry_point] + self.estimator.dependencies |
573 | | - ) |
574 | | - return f"{self.name}-{code_hash}"[:1024] |
575 | | - return None |
576 | | - |
577 | 544 |
|
578 | 545 | class CreateModelStep(ConfigurableRetryStep): |
579 | 546 | """`CreateModelStep` for SageMaker Pipelines Workflows.""" |
@@ -895,16 +862,6 @@ def __init__( |
895 | 862 | "code argument has to be a valid S3 URI or local file path " |
896 | 863 | + "rather than a pipeline variable" |
897 | 864 | ) |
898 | | - code_url = urlparse(code) |
899 | | - if code_url.scheme == "" or code_url.scheme == "file": |
900 | | - # By default, `Processor` will upload the local code to an S3 path |
901 | | - # containing a timestamp. This causes cache misses whenever a |
902 | | - # pipeline is updated, even if the underlying script hasn't changed. |
903 | | - # To avoid this, hash the contents of the script and include it |
904 | | - # in the `job_name` passed to the `Processor`, which will be used |
905 | | - # instead of the timestamped path. |
906 | | - self.job_name = self._generate_code_upload_path() |
907 | | - |
908 | 865 | warnings.warn( |
909 | 866 | ( |
910 | 867 | 'We are deprecating the instantiation of ProcessingStep using "processor".' |
|
0 commit comments