Skip to content

Commit 8434ba2

Browse files
committed
ain
1 parent 068a649 commit 8434ba2

File tree

3 files changed

+84
-2
lines changed

3 files changed

+84
-2
lines changed

.kiro/specs/neurolite-data-detection/tasks.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,11 @@
186186
- Write unit tests for deep learning model suggestions
187187
- _Requirements: 9.2_
188188

189-
- [ ] 9. Build PreprocessingRecommender for data preparation
190-
- [ ] 9.1 Implement preprocessing pipeline recommendations
189+
- [-] 9. Build PreprocessingRecommender for data preparation
190+
191+
192+
- [-] 9.1 Implement preprocessing pipeline recommendations
193+
191194
- Create normalization and standardization need detection
192195
- Add encoding requirement identification algorithms
193196
- Implement feature scaling necessity assessment

neurolite/core/data_models.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,80 @@ def __post_init__(self):
335335
raise ValueError("Seasonality period must be positive")
336336

337337

338+
# Preprocessing recommendation data models
339+
340+
@dataclass
341+
class ScalingRecommendation:
342+
"""Recommendation for feature scaling strategies."""
343+
scaling_type: Literal['standardization', 'normalization', 'robust_scaling', 'none']
344+
rationale: str
345+
confidence: float
346+
affected_columns: List[str] = field(default_factory=list)
347+
parameters: Dict[str, Any] = field(default_factory=dict)
348+
349+
def __post_init__(self):
350+
"""Validate ScalingRecommendation data after initialization."""
351+
if not 0.0 <= self.confidence <= 1.0:
352+
raise ValueError("Confidence must be between 0.0 and 1.0")
353+
if not self.rationale:
354+
raise ValueError("Rationale cannot be empty")
355+
356+
357+
@dataclass
358+
class EncodingRecommendation:
359+
"""Recommendation for categorical encoding strategies."""
360+
encoding_type: Literal['one_hot', 'label_encoding', 'target_encoding', 'binary_encoding', 'none']
361+
rationale: str
362+
confidence: float
363+
affected_columns: List[str] = field(default_factory=list)
364+
parameters: Dict[str, Any] = field(default_factory=dict)
365+
366+
def __post_init__(self):
367+
"""Validate EncodingRecommendation data after initialization."""
368+
if not 0.0 <= self.confidence <= 1.0:
369+
raise ValueError("Confidence must be between 0.0 and 1.0")
370+
if not self.rationale:
371+
raise ValueError("Rationale cannot be empty")
372+
373+
374+
@dataclass
375+
class FeatureEngineeringRecommendation:
376+
"""Recommendation for feature engineering strategies."""
377+
technique: str
378+
rationale: str
379+
confidence: float
380+
affected_columns: List[str] = field(default_factory=list)
381+
parameters: Dict[str, Any] = field(default_factory=dict)
382+
expected_benefit: str = ""
383+
384+
def __post_init__(self):
385+
"""Validate FeatureEngineeringRecommendation data after initialization."""
386+
if not 0.0 <= self.confidence <= 1.0:
387+
raise ValueError("Confidence must be between 0.0 and 1.0")
388+
if not self.rationale:
389+
raise ValueError("Rationale cannot be empty")
390+
if not self.technique:
391+
raise ValueError("Technique cannot be empty")
392+
393+
394+
@dataclass
395+
class PreprocessingPipeline:
396+
"""Complete preprocessing pipeline recommendation."""
397+
scaling_recommendations: List[ScalingRecommendation] = field(default_factory=list)
398+
encoding_recommendations: List[EncodingRecommendation] = field(default_factory=list)
399+
feature_engineering_recommendations: List[FeatureEngineeringRecommendation] = field(default_factory=list)
400+
pipeline_order: List[str] = field(default_factory=list)
401+
overall_confidence: float = 0.0
402+
estimated_processing_time: float = 0.0
403+
404+
def __post_init__(self):
405+
"""Validate PreprocessingPipeline data after initialization."""
406+
if not 0.0 <= self.overall_confidence <= 1.0:
407+
raise ValueError("Overall confidence must be between 0.0 and 1.0")
408+
if self.estimated_processing_time < 0:
409+
raise ValueError("Estimated processing time cannot be negative")
410+
411+
338412
# Task detection specific data models
339413

340414
@dataclass
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""
2+
Preprocessing recommender for data preparation strategies.
3+
4+
This module provides functionality to recommend appropriate preprocessing steps
5+
including normalizati

0 commit comments

Comments
 (0)