2222import tempfile
2323from abc import ABC , abstractmethod
2424from sagemaker import image_uris , s3 , utils
25+ from sagemaker .deprecations import deprecation_warning
2526from sagemaker .processing import ProcessingInput , ProcessingOutput , Processor
2627
2728logger = logging .getLogger (__name__ )
2829
2930
31+ @deprecation_warning (
32+ msg = "s3_data_distribution_type parameter will no longer be supported. Everything else will"
33+ " remain as is" ,
34+ date = "15 Mar 2022" ,
35+ )
3036class DataConfig :
3137 """Config object related to configurations of the input and output dataset."""
3238
@@ -58,8 +64,8 @@ def __init__(
5864 dataset format is JSONLines.
5965 dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
6066 "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
61- s3_data_distribution_type (str): Valid options are "FullyReplicated" or
62- "ShardedByS3Key" .
67+ s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
68+ Any other value is ignored .
6369 s3_compression_type (str): Valid options are "None" or "Gzip".
6470 joinsource (str): The name or index of the column in the dataset that acts as an
6571 identifier column (for instance, while performing a join). This column is only
@@ -80,7 +86,13 @@ def __init__(
8086 self .s3_data_input_path = s3_data_input_path
8187 self .s3_output_path = s3_output_path
8288 self .s3_analysis_config_output_path = s3_analysis_config_output_path
83- self .s3_data_distribution_type = s3_data_distribution_type
89+ if s3_data_distribution_type != "FullyReplicated" :
90+ logger .warning (
91+ "s3_data_distribution_type parameter, set to %s, is being ignored. Only"
92+ " valid option is FullyReplicated" ,
93+ s3_data_distribution_type ,
94+ )
95+ self .s3_data_distribution_type = "FullyReplicated"
8496 self .s3_compression_type = s3_compression_type
8597 self .label = label
8698 self .headers = headers
0 commit comments