27
27
import sagemaker
28
28
from sagemaker import git_utils
29
29
from sagemaker .analytics import TrainingJobAnalytics
30
- from sagemaker .debugger import (
31
- DebuggerHookConfig ,
32
- TensorBoardOutputConfig ,
33
- get_rule_container_image_uri ,
34
- )
30
+ from sagemaker .debugger import DebuggerHookConfig
31
+ from sagemaker .debugger import TensorBoardOutputConfig # noqa: F401 # pylint: disable=unused-import
32
+ from sagemaker .debugger import get_rule_container_image_uri
35
33
from sagemaker .s3 import S3Uploader
36
34
37
35
from sagemaker .fw_utils import (
@@ -331,6 +329,9 @@ def _prepare_for_training(self, job_name=None):
331
329
# Prepare rules and debugger configs for training.
332
330
if self .rules and not self .debugger_hook_config :
333
331
self .debugger_hook_config = DebuggerHookConfig (s3_output_path = self .output_path )
332
+ # If an object was provided without an S3 URI is not provided, default it for the customer.
333
+ if self .debugger_hook_config and not self .debugger_hook_config .s3_output_path :
334
+ self .debugger_hook_config .s3_output_path = self .output_path
334
335
self ._prepare_rules ()
335
336
self ._prepare_collection_configs ()
336
337
@@ -340,17 +341,13 @@ def _prepare_rules(self):
340
341
if self .rules is not None :
341
342
# Iterate through each of the provided rules.
342
343
for rule in self .rules :
343
- # Set the instance type and volume size using the Estimator's defaults.
344
344
# Set the image URI using the default rule evaluator image and the region.
345
345
if rule .image_uri == "DEFAULT_RULE_EVALUATOR_IMAGE" :
346
346
rule .image_uri = get_rule_container_image_uri (
347
347
self .sagemaker_session .boto_region_name
348
348
)
349
349
rule .instance_type = None
350
350
rule .volume_size_in_gb = None
351
- else :
352
- rule .instance_type = self .train_instance_type
353
- rule .volume_size_in_gb = self .train_volume_size
354
351
# If source was provided as a rule parameter, upload to S3 and save the S3 uri.
355
352
if "source_s3_uri" in (rule .rule_parameters or {}):
356
353
parse_result = urlparse (rule .rule_parameters ["source_s3_uri" ])
@@ -384,6 +381,42 @@ def _prepare_collection_configs(self):
384
381
if self .debugger_hook_config is not None :
385
382
self .collection_configs .update (self .debugger_hook_config .collection_configs or [])
386
383
384
+ def get_debugger_artifacts_path (self ):
385
+ """Gets the path to the DebuggerHookConfig output artifacts.
386
+
387
+ Returns:
388
+ str: An S3 path to the output artifacts.
389
+ """
390
+ self ._ensure_latest_training_job (
391
+ error_message = """Cannot get the Debugger artifacts path.
392
+ The Estimator is not associated with a training job."""
393
+ )
394
+ if self .debugger_hook_config is not None :
395
+ return os .path .join (
396
+ self .debugger_hook_config .s3_output_path ,
397
+ self .latest_training_job .name ,
398
+ "debug-output" ,
399
+ )
400
+ return None
401
+
402
+ def get_tensorboard_artifacts_path (self ):
403
+ """Gets the path to the TensorBoardOutputConfig output artifacts.
404
+
405
+ Returns:
406
+ str: An S3 path to the output artifacts.
407
+ """
408
+ self ._ensure_latest_training_job (
409
+ error_message = """Cannot get the TensorBoard artifacts path.
410
+ The Estimator is not associated with a training job."""
411
+ )
412
+ if self .debugger_hook_config is not None :
413
+ return os .path .join (
414
+ self .tensorboard_output_config .s3_output_path ,
415
+ self .latest_training_job .name ,
416
+ "tensorboard-output" ,
417
+ )
418
+ return None
419
+
387
420
def fit (self , inputs = None , wait = True , logs = "All" , job_name = None , experiment_config = None ):
388
421
"""Train a model using the input training dataset.
389
422
@@ -1626,10 +1659,6 @@ def _prepare_for_training(self, job_name=None):
1626
1659
# Set defaults for debugging.
1627
1660
if self .debugger_hook_config is None :
1628
1661
self .debugger_hook_config = DebuggerHookConfig (s3_output_path = self .output_path )
1629
- if self .tensorboard_output_config is None :
1630
- self .tensorboard_output_config = TensorBoardOutputConfig (
1631
- s3_output_path = self .output_path
1632
- )
1633
1662
1634
1663
def _stage_user_code_in_s3 (self ):
1635
1664
"""Upload the user training script to s3 and return the location.
0 commit comments