Skip to content

Commit e5b471f

Browse files
authored
Merge pull request #7 from alexhallam/6-add-intercept-and-have-a-new-structure-1
fix issue
2 parents 73d0976 + 97e0fd8 commit e5b471f

File tree

5 files changed

+387
-14
lines changed

5 files changed

+387
-14
lines changed

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,37 @@
11
# Changelog
22

3+
## [Unreleased]
4+
5+
### ✨ Added
6+
7+
- **Identity Role for Plain Terms**: Added new `Identity` role to `VariableRole` enum for variables that appear as plain terms in formulas (e.g., `x` in `y ~ x`)
8+
- **Intercept Column Support**: Added automatic inclusion of `"intercept"` column in `all_generated_columns` when `has_intercept` is true
9+
- **Formula Order Mapping**: Added new `all_generated_columns_formula_order` field that maps formula order (1, 2, 3...) to column names
10+
- **Comprehensive Test Suite**: Added 4 new unit tests to verify intercept handling and formula order mapping functionality
11+
12+
### 🔧 Improved
13+
14+
- **Variable Role Assignment**: Plain terms now correctly receive `Identity` role instead of `FixedEffect` role
15+
- **Generated Columns Preservation**: Variables with `Identity` role now preserve their original column name in generated columns list
16+
- **Intercept Positioning**: Intercept column is automatically inserted at index 1 (after response variable) in `all_generated_columns`
17+
18+
### 🐛 Fixed
19+
20+
- **Issue #4**: Fixed plain terms not receiving proper `Identity` role when appearing alone in formulas
21+
- **Issue #6**: Fixed missing intercept column in `all_generated_columns` and added formula order mapping
22+
23+
### 🧪 Testing
24+
25+
- **Unit Test Coverage**: Added comprehensive test coverage for intercept and formula order functionality
26+
- **Regression Prevention**: Tests ensure intercept is present when `has_intercept` is true and absent when false
27+
- **Order Validation**: Tests verify correct column ordering in both `all_generated_columns` and `all_generated_columns_formula_order`
28+
29+
### 🔄 Internal Changes
30+
31+
- **MetaBuilder Enhancement**: Updated `build()` method to handle intercept insertion and formula order mapping
32+
- **Data Structure Updates**: Enhanced `FormulaMetaData` struct with new `all_generated_columns_formula_order` field
33+
- **Role Management**: Improved role assignment logic in `push_plain_term()` and `add_transformation()` methods
34+
335
## [0.2.4] - 2025-09-05
436

537
### Added

src/internal/data_structures.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,15 @@ pub struct FormulaMetadataInfo {
448448
/// family: Some("gaussian".to_string())
449449
/// },
450450
/// columns,
451-
/// all_generated_columns: vec!["y".to_string(), "x".to_string(), "group".to_string()]
451+
/// all_generated_columns: vec!["y".to_string(), "intercept".to_string(), "x".to_string(), "group".to_string()],
452+
/// all_generated_columns_formula_order: {
453+
/// let mut map = HashMap::new();
454+
/// map.insert("1".to_string(), "y".to_string());
455+
/// map.insert("2".to_string(), "intercept".to_string());
456+
/// map.insert("3".to_string(), "x".to_string());
457+
/// map.insert("4".to_string(), "group".to_string());
458+
/// map
459+
/// }
452460
/// };
453461
/// ```
454462
#[derive(Debug, Serialize, Deserialize, Clone)]
@@ -462,8 +470,31 @@ pub struct FormulaMetaData {
462470
/// Detailed information about each variable
463471
pub columns: HashMap<String, VariableInfo>,
464472

465-
/// All generated column names ordered by variable ID
473+
/// All generated column names ordered by variable ID, including intercept if present
466474
pub all_generated_columns: Vec<String>,
475+
476+
/// Mapping of formula order to column names
477+
///
478+
/// This field provides a mapping from formula order (as string keys "1", "2", etc.)
479+
/// to the corresponding column names. The order follows the formula structure:
480+
/// 1. Response variable
481+
/// 2. Intercept (if present)
482+
/// 3. Variables in order of appearance in the formula
483+
///
484+
/// # Examples
485+
///
486+
/// For formula `y ~ x + poly(x, 2) + log(z)`:
487+
/// ```json
488+
/// {
489+
/// "1": "y",
490+
/// "2": "intercept",
491+
/// "3": "x",
492+
/// "4": "x_poly_1",
493+
/// "5": "x_poly_2",
494+
/// "6": "z_log"
495+
/// }
496+
/// ```
497+
pub all_generated_columns_formula_order: HashMap<String, String>,
467498
}
468499

469500
// Legacy structures for backward compatibility

src/internal/meta_builder.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ impl MetaBuilder {
234234
pub fn add_transformation(&mut self, name: &str, transformation: Transformation) {
235235
if let Some(var_info) = self.columns.get_mut(name) {
236236
var_info.transformations.push(transformation.clone());
237-
237+
238238
// If the variable has an Identity role, preserve the original variable name
239239
// and add the transformation's generated columns
240240
if var_info.roles.contains(&VariableRole::Identity) {
@@ -670,10 +670,47 @@ impl MetaBuilder {
670670
let mut sorted_vars: Vec<_> = self.columns.values().collect();
671671
sorted_vars.sort_by_key(|v| v.id);
672672

673-
for var in sorted_vars {
673+
for var in &sorted_vars {
674674
all_generated_columns.extend(var.generated_columns.clone());
675675
}
676676

677+
// Add intercept column if has_intercept is true
678+
if has_intercept {
679+
all_generated_columns.insert(1, "intercept".to_string()); // Insert after response (index 1)
680+
}
681+
682+
// Generate all_generated_columns_formula_order mapping
683+
let mut all_generated_columns_formula_order = std::collections::HashMap::new();
684+
let mut order_index = 1;
685+
686+
// Add response variable (always first)
687+
if let Some(response_var) = sorted_vars.iter().find(|v| v.id == 1) {
688+
if let Some(response_col) = response_var.generated_columns.first() {
689+
all_generated_columns_formula_order
690+
.insert(order_index.to_string(), response_col.clone());
691+
order_index += 1;
692+
}
693+
}
694+
695+
// Add intercept if present
696+
if has_intercept {
697+
all_generated_columns_formula_order
698+
.insert(order_index.to_string(), "intercept".to_string());
699+
order_index += 1;
700+
}
701+
702+
// Add all other variables in order
703+
for var in &sorted_vars {
704+
if var.id != 1 {
705+
// Skip response (already added)
706+
for col in &var.generated_columns {
707+
all_generated_columns_formula_order
708+
.insert(order_index.to_string(), col.clone());
709+
order_index += 1;
710+
}
711+
}
712+
}
713+
677714
crate::internal::data_structures::FormulaMetaData {
678715
formula: input.to_string(),
679716
metadata: FormulaMetadataInfo {
@@ -684,6 +721,7 @@ impl MetaBuilder {
684721
},
685722
columns: self.columns,
686723
all_generated_columns,
724+
all_generated_columns_formula_order,
687725
}
688726
}
689727
}

0 commit comments

Comments
 (0)