refactor: sort collections and improve determinism

JegernOUTT · JegernOUTT · commit ddf26195a3f8 · 2026-02-11T00:05:04.000+10:30
- Sort paths and directories in files_in_workspace for consistent trie building
- Sort context paths in pp_utils for stable processing order
- Replace HashMap with BTreeMap in pp_tool_results for sorted iteration
- Sort customization modes by title/id to ensure consistent UI order
- Enhance model pattern matching with canonical names and wildcard support
- Change default reasoning effort to Medium when boost_reasoning enabled
- Update anthropic test comment for clarity
diff --git a/refact-agent/engine/src/chat/prepare.rs b/refact-agent/engine/src/chat/prepare.rs
@@ -324,7 +324,7 @@ fn adapt_sampling_for_reasoning_models(
                 && model_record.supports_boost_reasoning
                 && sampling_parameters.boost_reasoning
             {
-                sampling_parameters.reasoning_effort = Some(ReasoningEffort::High);
+                sampling_parameters.reasoning_effort = Some(ReasoningEffort::Medium);
             }
             sampling_parameters.thinking = None;
             sampling_parameters.enable_thinking = None;
diff --git a/refact-agent/engine/src/files_in_workspace.rs b/refact-agent/engine/src/files_in_workspace.rs
@@ -165,20 +165,22 @@ impl CacheCorrection {
     }
 
     pub fn build(paths: &Vec<PathBuf>, workspace_folders: &Vec<PathBuf>) -> CacheCorrection {
-        let filenames = PathTrie::build(&paths, &workspace_folders);
-        // TODO: I'm not sure how directories should be collected
-        let directories: Vec<PathBuf> = {
+        let mut sorted_paths = paths.clone();
+        sorted_paths.sort();
+
+        let filenames = PathTrie::build(&sorted_paths, &workspace_folders);
+
+        let mut directories: Vec<PathBuf> = {
             let mut unique_directories = HashSet::new();
-            for p in paths.iter() {
+            for p in sorted_paths.iter() {
                 if let Some(parent) = p.parent() {
                     unique_directories.insert(parent);
                 }
             }
-            unique_directories
-                .iter()
-                .map(|p| PathBuf::from(p))
-                .collect()
+            unique_directories.iter().map(|p| PathBuf::from(p)).collect()
         };
+        directories.sort();
+
         let directories = PathTrie::build(&directories, &workspace_folders);
         CacheCorrection {
             filenames,
diff --git a/refact-agent/engine/src/http/routers/v1/customization_editor.rs b/refact-agent/engine/src/http/routers/v1/customization_editor.rs
@@ -11,6 +11,8 @@ use crate::files_correction::get_project_dirs;
 use crate::global_context::GlobalContext;
 use crate::yaml_configs::customization_registry::{load_merged_registry, load_registry_from_dir, invalidate_all_registry_caches, ConfigScope};
 use crate::yaml_configs::customization_types::*;
+use crate::yaml_configs::project_configs_bootstrap::{global_configs_try_create_all, project_configs_ensure_dirs};
+
 
 fn json_error(status: StatusCode, msg: &str) -> Result<Response<Body>, ScratchError> {
     let body = serde_json::json!({"error": msg});
@@ -81,9 +83,13 @@ pub async fn handle_v1_customization_registry(
     let dirs = get_project_dirs(gcx.clone()).await;
     let project_root = dirs.first().cloned();
 
+    let _ = global_configs_try_create_all(&config_dir).await;
+    if let Some(ref root) = project_root {
+        let _ = project_configs_ensure_dirs(root).await;
+    }
+
     let registry = load_merged_registry(&config_dir, project_root.as_deref()).await;
     let _global_registry = load_registry_from_dir(&config_dir).await;
-
     let local_refact_dir = project_root.as_ref().map(|p| p.join(".refact"));
 
     let make_config_item = |id: &str, kind: &str, title: &str, specific: bool| -> ConfigItem {
@@ -111,9 +117,31 @@ pub async fn handle_v1_customization_registry(
         }
     };
 
-    let mut modes: Vec<_> = registry.modes.values().map(|m| {
-        make_config_item(&m.id, "modes", if m.title.is_empty() { &m.id } else { &m.title }, m.specific)
-    }).collect();
+    let mut modes: Vec<_> = Vec::new();
+    let mut seen_mode_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
+
+    for m in registry.modes.values() {
+        if seen_mode_ids.insert(m.id.clone()) {
+            modes.push(make_config_item(
+                &m.id,
+                "modes",
+                if m.title.is_empty() { &m.id } else { &m.title },
+                m.specific,
+            ));
+        }
+    }
+
+    for m in &registry.mode_overrides {
+        if seen_mode_ids.insert(m.id.clone()) {
+            modes.push(make_config_item(
+                &m.id,
+                "modes",
+                if m.title.is_empty() { &m.id } else { &m.title },
+                m.specific,
+            ));
+        }
+    }
+
     modes.sort_by(|a, b| a.title.cmp(&b.title).then_with(|| a.id.cmp(&b.id)));
 
     let mut subagents: Vec<_> = registry.subagents.values().map(|s| {
diff --git a/refact-agent/engine/src/llm/adapters/anthropic.rs b/refact-agent/engine/src/llm/adapters/anthropic.rs
@@ -981,13 +981,13 @@ mod tests {
             "claude".to_string(),
             vec![ChatMessage::new("user".to_string(), "test".to_string())],
         );
-        req_low_max.params.max_tokens = 4096;  // Less than DEFAULT_THINKING_BUDGET (10000)
+        req_low_max.params.max_tokens = 4096;  // Less than DEFAULT_THINKING_BUDGET
         req_low_max.reasoning = ReasoningIntent::High;  // Will use DEFAULT_THINKING_BUDGET
         req_low_max.stream = true;
 
         let http = adapter.build_http(&req_low_max, &settings()).unwrap();
-        // Should be adjusted: 10000 + max(4096, 1024) = 14096
-        assert_eq!(http.body["max_tokens"], 14096);
+        // Should be adjusted: budget + max(current_max, 1024)
+        assert_eq!(http.body["max_tokens"], DEFAULT_THINKING_BUDGET + 4096);
         assert_eq!(http.body["thinking"]["budget_tokens"], DEFAULT_THINKING_BUDGET);
 
         // Test with max_tokens > thinking budget (should NOT be adjusted)
diff --git a/refact-agent/engine/src/postprocessing/pp_tool_results.rs b/refact-agent/engine/src/postprocessing/pp_tool_results.rs
@@ -1,4 +1,5 @@
-use std::collections::HashMap;
+use std::collections::BTreeMap;
+
 use std::path::PathBuf;
 use std::sync::Arc;
 use tokenizers::Tokenizer;
@@ -104,7 +105,7 @@ fn deduplicate_and_merge_context_files(
     context_files: Vec<ContextFile>,
     existing_messages: &[ChatMessage],
 ) -> (Vec<ContextFile>, Vec<String>) {
-    let mut file_groups: HashMap<String, Vec<ContextFile>> = HashMap::new();
+    let mut file_groups: BTreeMap<String, Vec<ContextFile>> = BTreeMap::new();
 
     for cf in context_files {
         let canonical = canonical_path(&cf.file_name).to_string_lossy().to_string();
diff --git a/refact-agent/engine/src/postprocessing/pp_utils.rs b/refact-agent/engine/src/postprocessing/pp_utils.rs
@@ -148,8 +148,9 @@ pub async fn pp_resolve_ctx_file_paths(
         }
         unique_cpaths.insert(context_file.file_name.clone());
     }
+    let mut unique_cpaths_vec: Vec<String> = unique_cpaths.into_iter().collect();
+    unique_cpaths_vec.sort();
 
-    let unique_cpaths_vec: Vec<String> = unique_cpaths.into_iter().collect();
     let shortified_vec: Vec<String> = shortify_paths(gcx.clone(), &unique_cpaths_vec).await;
     unique_cpaths_vec
         .into_iter()
diff --git a/refact-agent/engine/src/yaml_configs/customization_registry.rs b/refact-agent/engine/src/yaml_configs/customization_registry.rs
@@ -362,14 +362,54 @@ pub fn resolve_subagent_for_model(
 }
 
 fn model_matches_pattern(model_id: &str, pattern: &str) -> bool {
+    let canonical = crate::caps::model_caps::canonicalize_model_name(model_id);
+    let candidates = [
+        canonical.original.as_str(),
+        canonical.provider_stripped.as_str(),
+        canonical.base_model.as_str(),
+        canonical.last_segment.as_str(),
+        canonical.last_segment_base.as_str(),
+    ];
+
+    candidates.iter().any(|c| model_matches_pattern_single(c, pattern))
+        || {
+            let pattern_norm = normalize_model_match_str(pattern);
+            candidates
+                .iter()
+                .any(|c| model_matches_pattern_single(&normalize_model_match_str(c), &pattern_norm))
+        }
+}
+
+fn normalize_model_match_str(s: &str) -> String {
+    s.to_lowercase().replace('.', "-")
+}
+
+fn model_matches_pattern_single(model_id: &str, pattern: &str) -> bool {
     if pattern == "*" {
         return true;
     }
-    if pattern.ends_with("*") {
+
+    if !pattern.contains('*') {
+        return model_id == pattern;
+    }
+
+    if pattern.ends_with('*') {
         let prefix = &pattern[..pattern.len() - 1];
         return model_id.starts_with(prefix);
     }
-    model_id == pattern
+
+    if pattern.starts_with('*') {
+        let suffix = &pattern[1..];
+        return model_id.ends_with(suffix);
+    }
+
+    if let Some(star_pos) = pattern.find('*') {
+        let prefix = &pattern[..star_pos];
+        let suffix = &pattern[star_pos + 1..];
+        return model_id.starts_with(prefix) && model_id.ends_with(suffix);
+    }
+
+    false
 }
 
 pub fn match_tool_confirm_action(rules: &[ToolConfirmRule], tool_name: &str) -> Option<String> {
@@ -504,6 +544,11 @@ mod tests {
 
     #[test]
     fn test_model_matches_pattern_prefix() {
+        assert!(model_matches_pattern("openai/gpt-4o", "gpt-4*"));
+        assert!(model_matches_pattern("openrouter/openai/gpt-4o", "gpt-4*"));
+        assert!(model_matches_pattern("claude-3.7-sonnet", "claude-3-7*"));
+        assert!(model_matches_pattern("anthropic/claude-3.7-sonnet", "claude-3-7*"));
+
         assert!(model_matches_pattern("gpt-4o", "gpt-*"));
         assert!(model_matches_pattern("gpt-4-turbo", "gpt-*"));
         assert!(!model_matches_pattern("claude-3", "gpt-*"));

Original file line number	Diff line number	Diff line change
`@@ -324,7 +324,7 @@ fn adapt_sampling_for_reasoning_models(`
`324`	`324`	`&& model_record.supports_boost_reasoning`
`325`	`325`	`&& sampling_parameters.boost_reasoning`
`326`	`326`	`{`
`327`		`- sampling_parameters.reasoning_effort = Some(ReasoningEffort::High);`
	`327`	`+ sampling_parameters.reasoning_effort = Some(ReasoningEffort::Medium);`
`328`	`328`	`}`
`329`	`329`	`sampling_parameters.thinking = None;`
`330`	`330`	`sampling_parameters.enable_thinking = None;`
Original file line number	Diff line number	Diff line change
`@@ -148,8 +148,9 @@ pub async fn pp_resolve_ctx_file_paths(`
`148`	`148`	`}`
`149`	`149`	`unique_cpaths.insert(context_file.file_name.clone());`
`150`	`150`	`}`
	`151`	`+ let mut unique_cpaths_vec: Vec<String> = unique_cpaths.into_iter().collect();`
	`152`	`+ unique_cpaths_vec.sort();`
`151`	`153`
`152`		`- let unique_cpaths_vec: Vec<String> = unique_cpaths.into_iter().collect();`
`153`	`154`	`let shortified_vec: Vec<String> = shortify_paths(gcx.clone(), &unique_cpaths_vec).await;`
`154`	`155`	`unique_cpaths_vec`
`155`	`156`	`.into_iter()`