@@ -28,71 +28,78 @@ def __init__(self, data_validation_artifact: DataValidationArtifact,
2828 try :
2929 self .data_validation_artifact = data_validation_artifact
3030 self .data_transformation_config = data_transformation_config
31+
3132 except CustomException as e :
3233 raise CustomException (e )
33-
34+
3435 @staticmethod
3536 def read_file (file_path : Path ) -> pd .DataFrame :
3637 try :
3738 return pd .read_csv (file_path )
38- except Exception as e :
39+
40+ except CustomException as e :
3941 raise CustomException (e )
40-
41- def get_data_transformer_object (cls ) -> Pipeline :
42+
43+ def get_data_transformer_object (self ) -> Pipeline :
4244 """
4345 It initialises a KNNImputer object with the parameters specified in the training_pipeline.py file
4446 and returns a Pipeline object with the KNNImputer object as the first step.
4547
4648 Args:
47- cls : DataTransformation
49+ self : DataTransformation
4850
4951 Returns:
5052 A Pipeline object
5153 """
5254 logging .info ("Entered get_data_transformer_object method of Transformation class" )
53-
55+
5456 try :
55- imputer :KNNImputer = KNNImputer (** DATA_TRANSFORMATION_IMPUTER_PARAMS )
56- logging .info (f"Initialise KNNImputer with { DATA_TRANSFORMATION_IMPUTER_PARAMS } " )
57- processor :Pipeline = Pipeline ([("imputer" ,imputer )])
58- return processor
59- except CustomException as e :
57+ imputer : KNNImputer = KNNImputer (** DATA_TRANSFORMATION_IMPUTER_PARAMS )
58+ logging .info (f"Initialise KNNImputer with { DATA_TRANSFORMATION_IMPUTER_PARAMS } " )
59+ processor : Pipeline = Pipeline ([("imputer" , imputer )])
60+ return processor
61+
62+ except Exception as e :
6063 raise CustomException (e )
61-
64+
6265 def initiate_data_transformation (self ) -> DataTransformationArtifact :
6366 logging .info ('Entered initiate_data_transformation method of DataTransformation class' )
6467 try :
65- logging .info ('Started Data Transformation' )
66- train_df = DataTransformation .read_file (self .data_validation_artifact .valid_train_file_path )
67- test_df = DataTransformation .read_file (self .data_validation_artifact .valid_test_file_path )
68+ logging .info ('Started Data Transformation' )
69+ train_df = DataTransformation .read_file (self .data_validation_artifact .valid_train_file_path )
70+ test_df = DataTransformation .read_file (self .data_validation_artifact .valid_test_file_path )
6871
69- # training dataframe
70- input_feature_train_df = train_df .drop (columns = [TARGET_COLUMN ], axis = 1 )
71- target_feature_train_df = train_df [TARGET_COLUMN ]
72- target_feature_train_df = target_feature_train_df .replace (- 1 , 0 )
72+ # training dataframe
73+ input_feature_train_df = train_df .drop (columns = [TARGET_COLUMN ], axis = 1 )
74+ target_feature_train_df = train_df [TARGET_COLUMN ]
75+ target_feature_train_df = target_feature_train_df .replace (- 1 , 0 )
7376
74- # testing dataframe
75- input_feature_test_df = test_df .drop (columns = [TARGET_COLUMN ], axis = 1 )
76- target_feature_test_df = test_df [TARGET_COLUMN ]
77- target_feature_test_df = target_feature_test_df .replace (- 1 , 0 )
77+ # testing dataframe
78+ input_feature_test_df = test_df .drop (columns = [TARGET_COLUMN ], axis = 1 )
79+ target_feature_test_df = test_df [TARGET_COLUMN ]
80+ target_feature_test_df = target_feature_test_df .replace (- 1 , 0 )
7881
79- preprocessor = self .get_data_transformer_object ()
82+ preprocessor = self .get_data_transformer_object ()
8083
81- preprocessor_obj = preprocessor .fit (input_feature_train_df )
82- transformed_input_train_feature = preprocessor_obj .transform (input_feature_train_df )
83- transformed_input_test_feature = preprocessor_obj .transform (input_feature_test_df )
84+ preprocessor_obj = preprocessor .fit (input_feature_train_df )
85+ transformed_input_train_feature = preprocessor_obj .transform (input_feature_train_df )
86+ transformed_input_test_feature = preprocessor_obj .transform (input_feature_test_df )
8487
85- train_arr = np .c_ [transformed_input_train_feature , np .array (target_feature_train_df )]
86- test_arr = np .c_ [transformed_input_test_feature , np .array (target_feature_test_df )]
87-
88- save_object (self .data_transformation_config .transformed_object_file_path , preprocessor_obj )
88+ train_arr = np .c_ [transformed_input_train_feature , np .array (target_feature_train_df )]
89+ test_arr = np .c_ [transformed_input_test_feature , np .array (target_feature_test_df )]
8990
90- data_transformation_artifact = DataTransformationArtifact (
91- transformed_object_file_path = self .data_transformation_config .transformed_object_file_path ,
92- transformed_train_file_path = self .data_transformation_config .transformed_train_file_path ,
93- transformed_test_file_path = self .data_transformation_config .transformed_test_file_path
94- )
91+ save_object (self .data_transformation_config .transformed_object_file_path , preprocessor_obj )
92+ save_numpy_array_data (self .data_transformation_config .transformed_train_file_path , array = train_arr )
93+ save_numpy_array_data (self .data_transformation_config .transformed_test_file_path , array = test_arr )
9594
96- except CustomException as e :
95+ data_transformation_artifact = DataTransformationArtifact (
96+ transformed_object_file_path = self .data_transformation_config .transformed_object_file_path ,
97+ transformed_train_file_path = self .data_transformation_config .transformed_train_file_path ,
98+ transformed_test_file_path = self .data_transformation_config .transformed_test_file_path
99+ )
100+
101+ return data_transformation_artifact
102+
103+ except Exception as e :
97104 raise CustomException (e )
98105
0 commit comments