2222import tempfile
2323from abc import ABC , abstractmethod
2424from sagemaker import image_uris , s3 , utils
25- from sagemaker .deprecations import deprecation_warning
2625from sagemaker .processing import ProcessingInput , ProcessingOutput , Processor
2726
2827logger = logging .getLogger (__name__ )
2928
3029
31- @deprecation_warning (
32- msg = "s3_data_distribution_type parameter will no longer be supported. Everything else will"
33- " remain as is" ,
34- date = "15 Mar 2022" ,
35- )
3630class DataConfig :
3731 """Config object related to configurations of the input and output dataset."""
3832
@@ -45,7 +39,6 @@ def __init__(
4539 headers = None ,
4640 features = None ,
4741 dataset_type = "text/csv" ,
48- s3_data_distribution_type = "FullyReplicated" ,
4942 s3_compression_type = "None" ,
5043 joinsource = None ,
5144 ):
@@ -64,8 +57,6 @@ def __init__(
6457 dataset format is JSONLines.
6558 dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
6659 "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
67- s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
68- Any other value is ignored.
6960 s3_compression_type (str): Valid options are "None" or "Gzip".
7061 joinsource (str): The name or index of the column in the dataset that acts as an
7162 identifier column (for instance, while performing a join). This column is only
@@ -86,12 +77,6 @@ def __init__(
8677 self .s3_data_input_path = s3_data_input_path
8778 self .s3_output_path = s3_output_path
8879 self .s3_analysis_config_output_path = s3_analysis_config_output_path
89- if s3_data_distribution_type != "FullyReplicated" :
90- logger .warning (
91- "s3_data_distribution_type parameter, set to %s, is being ignored. Only"
92- " valid option is FullyReplicated" ,
93- s3_data_distribution_type ,
94- )
9580 self .s3_data_distribution_type = "FullyReplicated"
9681 self .s3_compression_type = s3_compression_type
9782 self .label = label
0 commit comments