Skip to content

Commit be2b012

Browse files
committed
refactor: Fix linting issues and code quality improvements
1 parent 7548cf4 commit be2b012

File tree

7 files changed

+110
-93
lines changed

7 files changed

+110
-93
lines changed

NetworkSecurity/components/data_transformation.py

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -28,71 +28,78 @@ def __init__(self, data_validation_artifact: DataValidationArtifact,
2828
try:
2929
self.data_validation_artifact = data_validation_artifact
3030
self.data_transformation_config = data_transformation_config
31+
3132
except CustomException as e:
3233
raise CustomException(e)
33-
34+
3435
@staticmethod
3536
def read_file(file_path: Path) -> pd.DataFrame:
3637
try:
3738
return pd.read_csv(file_path)
38-
except Exception as e:
39+
40+
except CustomException as e:
3941
raise CustomException(e)
40-
41-
def get_data_transformer_object(cls) -> Pipeline:
42+
43+
def get_data_transformer_object(self) -> Pipeline:
4244
"""
4345
It initialises a KNNImputer object with the parameters specified in the training_pipeline.py file
4446
and returns a Pipeline object with the KNNImputer object as the first step.
4547
4648
Args:
47-
cls: DataTransformation
49+
self: DataTransformation
4850
4951
Returns:
5052
A Pipeline object
5153
"""
5254
logging.info("Entered get_data_transformer_object method of Transformation class")
53-
55+
5456
try:
55-
imputer:KNNImputer = KNNImputer(**DATA_TRANSFORMATION_IMPUTER_PARAMS)
56-
logging.info(f"Initialise KNNImputer with {DATA_TRANSFORMATION_IMPUTER_PARAMS}")
57-
processor:Pipeline=Pipeline([("imputer",imputer)])
58-
return processor
59-
except CustomException as e:
57+
imputer: KNNImputer = KNNImputer(**DATA_TRANSFORMATION_IMPUTER_PARAMS)
58+
logging.info(f"Initialise KNNImputer with {DATA_TRANSFORMATION_IMPUTER_PARAMS}")
59+
processor: Pipeline = Pipeline([("imputer", imputer)])
60+
return processor
61+
62+
except Exception as e:
6063
raise CustomException(e)
61-
64+
6265
def initiate_data_transformation(self) -> DataTransformationArtifact:
6366
logging.info('Entered initiate_data_transformation method of DataTransformation class')
6467
try:
65-
logging.info('Started Data Transformation')
66-
train_df = DataTransformation.read_file(self.data_validation_artifact.valid_train_file_path)
67-
test_df = DataTransformation.read_file(self.data_validation_artifact.valid_test_file_path)
68+
logging.info('Started Data Transformation')
69+
train_df = DataTransformation.read_file(self.data_validation_artifact.valid_train_file_path)
70+
test_df = DataTransformation.read_file(self.data_validation_artifact.valid_test_file_path)
6871

69-
# training dataframe
70-
input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN], axis=1)
71-
target_feature_train_df = train_df[TARGET_COLUMN]
72-
target_feature_train_df = target_feature_train_df.replace(-1, 0)
72+
# training dataframe
73+
input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN], axis=1)
74+
target_feature_train_df = train_df[TARGET_COLUMN]
75+
target_feature_train_df = target_feature_train_df.replace(-1, 0)
7376

74-
# testing dataframe
75-
input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN], axis=1)
76-
target_feature_test_df = test_df[TARGET_COLUMN]
77-
target_feature_test_df = target_feature_test_df.replace(-1, 0)
77+
# testing dataframe
78+
input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN], axis=1)
79+
target_feature_test_df = test_df[TARGET_COLUMN]
80+
target_feature_test_df = target_feature_test_df.replace(-1, 0)
7881

79-
preprocessor = self.get_data_transformer_object()
82+
preprocessor = self.get_data_transformer_object()
8083

81-
preprocessor_obj = preprocessor.fit(input_feature_train_df)
82-
transformed_input_train_feature = preprocessor_obj.transform(input_feature_train_df)
83-
transformed_input_test_feature = preprocessor_obj.transform(input_feature_test_df)
84+
preprocessor_obj = preprocessor.fit(input_feature_train_df)
85+
transformed_input_train_feature = preprocessor_obj.transform(input_feature_train_df)
86+
transformed_input_test_feature = preprocessor_obj.transform(input_feature_test_df)
8487

85-
train_arr = np.c_[transformed_input_train_feature, np.array(target_feature_train_df)]
86-
test_arr = np.c_[transformed_input_test_feature, np.array(target_feature_test_df)]
87-
88-
save_object(self.data_transformation_config.transformed_object_file_path, preprocessor_obj)
88+
train_arr = np.c_[transformed_input_train_feature, np.array(target_feature_train_df)]
89+
test_arr = np.c_[transformed_input_test_feature, np.array(target_feature_test_df)]
8990

90-
data_transformation_artifact = DataTransformationArtifact(
91-
transformed_object_file_path = self.data_transformation_config.transformed_object_file_path,
92-
transformed_train_file_path = self.data_transformation_config.transformed_train_file_path,
93-
transformed_test_file_path = self.data_transformation_config.transformed_test_file_path
94-
)
91+
save_object(self.data_transformation_config.transformed_object_file_path, preprocessor_obj)
92+
save_numpy_array_data(self.data_transformation_config.transformed_train_file_path, array=train_arr)
93+
save_numpy_array_data(self.data_transformation_config.transformed_test_file_path, array=test_arr)
9594

96-
except CustomException as e:
95+
data_transformation_artifact = DataTransformationArtifact(
96+
transformed_object_file_path=self.data_transformation_config.transformed_object_file_path,
97+
transformed_train_file_path=self.data_transformation_config.transformed_train_file_path,
98+
transformed_test_file_path=self.data_transformation_config.transformed_test_file_path
99+
)
100+
101+
return data_transformation_artifact
102+
103+
except Exception as e:
97104
raise CustomException(e)
98105

NetworkSecurity/components/data_validation.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from scipy.stats import ks_2samp
33
from pathlib import Path
44

5-
from NetworkSecurity.constant.training_pipeline import SCHEME_FILE_PATH
5+
from NetworkSecurity.constant.training_pipeline import SCHEMA_FILE_PATH
66
from NetworkSecurity.entity.config import DataValidationConfig
77
from NetworkSecurity.entity.artifact import DataIngestionArtifact, DataValidationArtifact
88
from NetworkSecurity.exception.exception import CustomException
@@ -15,51 +15,52 @@ def __init__(self, data_ingestion_artifact: DataIngestionArtifact, data_validati
1515
try:
1616
self.data_ingestion_artifact = data_ingestion_artifact
1717
self.data_validation_config = data_validation_config
18-
self._schema_config = read_yaml_file(SCHEME_FILE_PATH)
18+
self._schema_config = read_yaml_file(SCHEMA_FILE_PATH)
1919

2020
except Exception as e:
2121
raise CustomException(e)
22-
22+
2323
@staticmethod
2424
def read_file(file_path: Path) -> pd.DataFrame:
2525
try:
2626
return pd.read_csv(file_path)
27+
2728
except Exception as e:
2829
raise CustomException(e)
29-
30+
3031
def validate_number_of_columns(self, dataframe: pd.DataFrame) -> bool:
3132
try:
3233
number_of_columns = len(self._schema_config['columns'])
3334
logging.info(f"Required Number of columns: {number_of_columns}")
34-
logging.info(f"Total number of columns in dataframe: {dataframe.columns}")
35+
logging.info(f"Dataframe has {len(dataframe.columns)} columns")
3536

36-
if(number_of_columns == len(dataframe.columns)):
37+
if number_of_columns == len(dataframe.columns):
3738
return True
3839
else:
3940
return False
4041

4142
except Exception as e:
4243
raise CustomException(e)
43-
44-
def detect_data_drift(self, base_df: pd.DataFrame, current_df: pd.DataFrame, threshold:float = 0.96) -> bool:
44+
45+
def detect_data_drift(self, base_df: pd.DataFrame, current_df: pd.DataFrame, threshold: float = 0.96) -> bool:
4546
try:
4647
drift_status = False
4748
report = {}
4849

49-
for columns in base_df.columns:
50-
d1 = base_df[columns]
51-
d2 = current_df[columns]
50+
for column in base_df.columns:
51+
d1 = base_df[column]
52+
d2 = current_df[column]
5253
is_same_dist = ks_2samp(d1, d2)
5354

5455
if threshold <= is_same_dist.pvalue:
5556
is_found = False
5657
else:
5758
is_found = True
5859
drift_status = True
59-
60-
report.update({columns:{
61-
"p_value":float(is_same_dist.pvalue),
62-
"drift_status":is_found
60+
61+
report.update({column: {
62+
"p_value": float(is_same_dist.pvalue),
63+
"drift_status": is_found
6364
}})
6465

6566
drift_report_file_path = self.data_validation_config.drift_report_file_path
@@ -68,13 +69,12 @@ def detect_data_drift(self, base_df: pd.DataFrame, current_df: pd.DataFrame, thr
6869
dir_path.mkdir(parents=True, exist_ok=True)
6970

7071
write_yaml_file(drift_report_file_path, report)
71-
72+
7273
return drift_status
73-
74+
7475
except Exception as e:
7576
raise CustomException(e)
7677

77-
7878
def initiate_data_validation(self) -> DataValidationArtifact:
7979
try:
8080
train_file_path = self.data_ingestion_artifact.train_file_path
@@ -83,7 +83,7 @@ def initiate_data_validation(self) -> DataValidationArtifact:
8383
train_dataframe = DataValidation.read_file(train_file_path)
8484
test_dataframe = DataValidation.read_file(test_file_path)
8585

86-
# validate data
86+
# validate data
8787
train_status = self.validate_number_of_columns(train_dataframe)
8888
test_status = self.validate_number_of_columns(test_dataframe)
8989

@@ -106,14 +106,14 @@ def initiate_data_validation(self) -> DataValidationArtifact:
106106
self.data_validation_config.valid_data_test_path, index=False, header=True
107107
)
108108
data_validation_artifact = DataValidationArtifact(
109-
validation_status=status,
110-
valid_train_file_path=self.data_ingestion_artifact.train_file_path,
111-
valid_test_file_path=self.data_ingestion_artifact.test_file_path,
112-
invalid_train_file_path=Path(""), # used instead of None
113-
invalid_test_file_path=Path(""),
114-
drift_report_file_path=self.data_validation_config.drift_report_file_path,
115-
)
116-
109+
validation_status=status,
110+
valid_train_file_path=self.data_validation_config.valid_data_train_path,
111+
valid_test_file_path=self.data_validation_config.valid_data_test_path,
112+
invalid_train_file_path=Path("."),
113+
invalid_test_file_path=Path("."),
114+
drift_report_file_path=self.data_validation_config.drift_report_file_path,
115+
)
116+
117117
return data_validation_artifact
118118

119119
except Exception as e:

NetworkSecurity/constant/training_pipeline/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@
1010
TRAIN_FILE_NAME: str = 'train.csv'
1111
TEST_FILE_NAME: str = 'test.csv'
1212

13-
SCHEME_FILE_PATH: Path = Path("data_schema") / "schema.yaml"
13+
SCHEMA_FILE_PATH: Path = Path("data_schema") / "schema.yaml"
1414

1515
# Data Ingestion Constant
1616
DATA_INGESTION_COLLECTION_NAME: str = 'NetworkData'
1717
DATA_INGESTION_DATABASE_NAME: str = 'NetworkDatabase'
1818
DATA_INGESTION_DIR_NAME: str = 'data_ingestion'
1919
DATA_INGESTION_FEATURE_STORE_DIR: str = 'feature_store'
2020
DATA_INGESTION_INGESTED_DIR: str = 'ingested'
21-
DATA_INGESTION_TEST_TRAIN_SPLIT_RATION: float = 0.2
21+
DATA_INGESTION_TEST_TRAIN_SPLIT_RATIO: float = 0.2
2222

2323
# Data Validation Constant
2424
DATA_VALIDATION_DIR_NAME: str = 'data_validation'
@@ -33,7 +33,7 @@
3333
DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR: str = "transformed"
3434
DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR: str = "transformed_object"
3535

36-
## kkn imputer to replace nan values
36+
## knn imputer to replace nan values
3737
DATA_TRANSFORMATION_IMPUTER_PARAMS: dict = {
3838
"missing_values": np.nan,
3939
"n_neighbors": 3,

NetworkSecurity/entity/config.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
class TrainingPipelineConfig:
21-
def __init__(self, timestamp:str | None = None):
21+
def __init__(self, timestamp: str | None = None):
2222
try:
2323
if timestamp is None:
2424
timestamp = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
@@ -27,9 +27,10 @@ def __init__(self, timestamp:str | None = None):
2727
self.artifact_name = training_pipeline.ARTIFACT_DIR
2828
self.artifact_dir: Path = Path(self.artifact_name) / timestamp
2929
self.timestamp: str = timestamp
30+
3031
except Exception as e:
3132
raise CustomException(e)
32-
33+
3334

3435
class DataIngestionConfig:
3536
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
@@ -54,20 +55,22 @@ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
5455
training_pipeline.TEST_FILE_NAME
5556
)
5657

57-
self.train_test_split_ratio = training_pipeline.DATA_INGESTION_TEST_TRAIN_SPLIT_RATION
58+
self.train_test_split_ratio = training_pipeline.DATA_INGESTION_TEST_TRAIN_SPLIT_RATIO
5859
self.collection_name = training_pipeline.DATA_INGESTION_COLLECTION_NAME
5960
self.database_name = training_pipeline.DATA_INGESTION_DATABASE_NAME
61+
6062
except Exception as e:
6163
raise CustomException(e)
62-
64+
65+
6366
class DataValidationConfig:
6467
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
6568
try:
6669
self.data_validation_dir: Path = (
6770
training_pipeline_config.artifact_dir /
68-
training_pipeline.DATA_VALIDATION_DIR_NAME
71+
training_pipeline.DATA_VALIDATION_DIR_NAME
6972
)
70-
73+
7174
self.valid_data_dir: Path = (
7275
self.data_validation_dir /
7376
training_pipeline.DATA_VALIDATION_VALID_DIR
@@ -79,17 +82,17 @@ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
7982
)
8083

8184
self.valid_data_train_path: Path = (
82-
self.valid_data_dir /
85+
self.valid_data_dir /
8386
training_pipeline.TRAIN_FILE_NAME
8487
)
8588

8689
self.valid_data_test_path: Path = (
87-
self.valid_data_dir /
90+
self.valid_data_dir /
8891
training_pipeline.TEST_FILE_NAME
8992
)
9093

9194
self.invalid_data_train_path: Path = (
92-
self.invalid_data_dir /
95+
self.invalid_data_dir /
9396
training_pipeline.TRAIN_FILE_NAME
9497
)
9598

@@ -103,9 +106,11 @@ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
103106
training_pipeline.DATA_VALIDATION_DRIFT_REPORT_DIR /
104107
training_pipeline.DATA_VALIDATION_DRIFT_REPORT_FILE_NAME
105108
)
109+
106110
except Exception as e:
107111
raise CustomException(e)
108112

113+
109114
class DataTransformationConfig:
110115
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
111116
try:
@@ -130,6 +135,7 @@ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
130135
training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR /
131136
training_pipeline.PREPROCESSING_OBJECT_FILE_NAME
132137
)
138+
133139
except Exception as e:
134140
raise CustomException(e)
135-
141+

0 commit comments

Comments
 (0)