@@ -335,6 +335,80 @@ def __post_init__(self):
335335 raise ValueError ("Seasonality period must be positive" )
336336
337337
338+ # Preprocessing recommendation data models
339+
340+ @dataclass
341+ class ScalingRecommendation :
342+ """Recommendation for feature scaling strategies."""
343+ scaling_type : Literal ['standardization' , 'normalization' , 'robust_scaling' , 'none' ]
344+ rationale : str
345+ confidence : float
346+ affected_columns : List [str ] = field (default_factory = list )
347+ parameters : Dict [str , Any ] = field (default_factory = dict )
348+
349+ def __post_init__ (self ):
350+ """Validate ScalingRecommendation data after initialization."""
351+ if not 0.0 <= self .confidence <= 1.0 :
352+ raise ValueError ("Confidence must be between 0.0 and 1.0" )
353+ if not self .rationale :
354+ raise ValueError ("Rationale cannot be empty" )
355+
356+
357+ @dataclass
358+ class EncodingRecommendation :
359+ """Recommendation for categorical encoding strategies."""
360+ encoding_type : Literal ['one_hot' , 'label_encoding' , 'target_encoding' , 'binary_encoding' , 'none' ]
361+ rationale : str
362+ confidence : float
363+ affected_columns : List [str ] = field (default_factory = list )
364+ parameters : Dict [str , Any ] = field (default_factory = dict )
365+
366+ def __post_init__ (self ):
367+ """Validate EncodingRecommendation data after initialization."""
368+ if not 0.0 <= self .confidence <= 1.0 :
369+ raise ValueError ("Confidence must be between 0.0 and 1.0" )
370+ if not self .rationale :
371+ raise ValueError ("Rationale cannot be empty" )
372+
373+
374+ @dataclass
375+ class FeatureEngineeringRecommendation :
376+ """Recommendation for feature engineering strategies."""
377+ technique : str
378+ rationale : str
379+ confidence : float
380+ affected_columns : List [str ] = field (default_factory = list )
381+ parameters : Dict [str , Any ] = field (default_factory = dict )
382+ expected_benefit : str = ""
383+
384+ def __post_init__ (self ):
385+ """Validate FeatureEngineeringRecommendation data after initialization."""
386+ if not 0.0 <= self .confidence <= 1.0 :
387+ raise ValueError ("Confidence must be between 0.0 and 1.0" )
388+ if not self .rationale :
389+ raise ValueError ("Rationale cannot be empty" )
390+ if not self .technique :
391+ raise ValueError ("Technique cannot be empty" )
392+
393+
394+ @dataclass
395+ class PreprocessingPipeline :
396+ """Complete preprocessing pipeline recommendation."""
397+ scaling_recommendations : List [ScalingRecommendation ] = field (default_factory = list )
398+ encoding_recommendations : List [EncodingRecommendation ] = field (default_factory = list )
399+ feature_engineering_recommendations : List [FeatureEngineeringRecommendation ] = field (default_factory = list )
400+ pipeline_order : List [str ] = field (default_factory = list )
401+ overall_confidence : float = 0.0
402+ estimated_processing_time : float = 0.0
403+
404+ def __post_init__ (self ):
405+ """Validate PreprocessingPipeline data after initialization."""
406+ if not 0.0 <= self .overall_confidence <= 1.0 :
407+ raise ValueError ("Overall confidence must be between 0.0 and 1.0" )
408+ if self .estimated_processing_time < 0 :
409+ raise ValueError ("Estimated processing time cannot be negative" )
410+
411+
338412# Task detection specific data models
339413
340414@dataclass
0 commit comments