diff --git a/README.md b/README.md
index 74d23d2..eabd01b 100644
--- a/README.md
+++ b/README.md
@@ -29,20 +29,23 @@ arc_solver_project/
 │   ├── dsl.py                 # Domain-specific language primitives
 │   ├── heuristics.py          # Heuristic rule inference
 │   ├── search.py              # Basic brute-force search
-│   ├── solver.py              # Main solver interface (enhanced)
-│   ├── enhanced_solver.py     # Enhanced solver with neural components
+│   ├── solver.py              # Main solver interface with enhancements
 │   ├── enhanced_search.py     # Neural-guided program synthesis
+│   ├── features.py            # Task feature extraction
+│   ├── ttt.py                 # Test-time training utilities
 │   ├── io_utils.py            # JSON loading and submission helpers
-│   └── neural/                # Neural guidance components
-│       ├── features.py        # Task feature extraction
+│   └── neural/                # Neural components
 │       ├── guidance.py        # Neural operation prediction
-│       ├── sketches.py        # Program sketch mining
 │       ├── episodic.py        # Episodic retrieval system
-│       └── ttt.py             # Test-time training
+│       └── sketches.py        # Program sketch mining
 │
 ├── arc_submit.py              # Command-line submission script
-├── train_neural_guidance.py   # Training script for neural components
-├── benchmark.py               # Benchmarking and evaluation tools
+├── tools/                     # Training and benchmarking utilities
+│   ├── train_guidance.py
+│   ├── mine_sketches.py
+│   ├── build_memory.py
+│   └── benchmark.py
+├── tests/                     # Unit and integration tests
 └── README.md                  # This file
 ```
 
@@ -62,22 +65,21 @@ ARC_USE_BASELINE=1 python arc_submit.py
 
 ```bash
 # Train neural guidance (requires training data)
-python train_neural_guidance.py
+python tools/train_guidance.py
 
 # Or setup environment with defaults
-python benchmark.py
+python tools/benchmark.py
 ```
 
 ### 3. Python API
 
 ```python
-from arc_solver.enhanced_solver import solve_task_enhanced
+from arc_solver.solver import solve_task_enhanced, ARCSolver
 
 # Solve a single task with full enhancements
 result = solve_task_enhanced(task)
 
 # Configure solver behavior
-from arc_solver.enhanced_solver import ARCSolver
 solver = ARCSolver(use_enhancements=True)
 result = solver.solve_task(task)
 ```
diff --git a/arc_solver/enhanced_search.py b/arc_solver/enhanced_search.py
index 2abf7a8..4f7a56c 100644
--- a/arc_solver/enhanced_search.py
+++ b/arc_solver/enhanced_search.py
@@ -15,9 +15,9 @@
 from .grid import Array, eq
 from .dsl import OPS, apply_program
 from .heuristics import consistent_program_single_step, score_candidate, diversify_programs
-from .guidance import NeuralGuidance
-from .memory import EpisodicRetrieval
-from .sketches import SketchMiner, generate_parameter_grid
+from .neural.guidance import NeuralGuidance
+from .neural.episodic import EpisodicRetrieval
+from .neural.sketches import SketchMiner, generate_parameter_grid
 from .ttt import TestTimeTrainer, DataAugmentation
 
 
diff --git a/arc_solver/enhanced_solver.py b/arc_solver/enhanced_solver.py
deleted file mode 100644
index 3c7e7db..0000000
--- a/arc_solver/enhanced_solver.py
+++ /dev/null
@@ -1,214 +0,0 @@
-"""
-Enhanced top-level solver interface for ARC tasks.
-
-This module integrates the enhanced search capabilities including neural guidance,
-episodic retrieval, program sketches, and test-time training to provide better
-solutions for ARC tasks.
-"""
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, Optional, Tuple
-import numpy as np
-import os
-
-from .grid import to_array, to_list, Array
-from .search import (
-    synthesize as synth_baseline,
-    predict_two as predict_two_baseline,
-)
-from .enhanced_search import synthesize_with_enhancements, predict_two_enhanced
-
-
-class ARCSolver:
-    """Enhanced ARC solver with neural components and episodic memory."""
-    
-    def __init__(self, use_enhancements: bool = True,
-                 guidance_model_path: str = None,
-                 episode_db_path: str = "episodes.json"):
-        self.use_enhancements = use_enhancements
-        self.guidance_model_path = guidance_model_path
-        self.episode_db_path = episode_db_path
-        self.stats = {
-            'tasks_solved': 0,
-            'total_tasks': 0,
-            'enhancement_success_rate': 0.0,
-            'fallback_used': 0,
-        }
-        self._last_outputs: Optional[Tuple[List[List[List[int]]], List[List[List[int]]]]] = None
-    
-    def solve_task(self, task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
-        """Solve a single ARC task using enhanced or baseline methods."""
-        self.stats['total_tasks'] += 1
-        
-        # Extract training pairs as numpy arrays
-        train_pairs: List[Tuple[Array, Array]] = []
-        for pair in task["train"]:
-            a = to_array(pair["input"])
-            b = to_array(pair["output"])
-            train_pairs.append((a, b))
-        
-        # Extract test inputs
-        test_inputs: List[Array] = []
-        for pair in task["test"]:
-            test_inputs.append(to_array(pair["input"]))
-        
-        # Try enhanced synthesis first, fall back to baseline if needed
-        try:
-            if self.use_enhancements:
-                progs = synthesize_with_enhancements(train_pairs)
-                attempts = predict_two_enhanced(progs, test_inputs)
-                
-                # Check if we got a reasonable solution
-                if self._validate_solution(attempts, test_inputs):
-                    self.stats['tasks_solved'] += 1
-                    return {
-                        "attempt_1": [to_list(arr) for arr in attempts[0]],
-                        "attempt_2": [to_list(arr) for arr in attempts[1]],
-                    }
-                else:
-                    # Enhancement didn't work, try fallback
-                    self.stats['fallback_used'] += 1
-                    raise Exception("Enhanced search failed validation")
-            else:
-                raise Exception("Enhancements disabled")
-                
-        except Exception:
-            # Fall back to baseline approach
-            progs = synth_baseline(train_pairs)
-            attempts = predict_two_baseline(progs, test_inputs)
-        
-        # Convert outputs back to nested lists
-        return {
-            "attempt_1": [to_list(arr) for arr in attempts[0]],
-            "attempt_2": [to_list(arr) for arr in attempts[1]],
-        }
-
-    def solve_task_two_attempts(
-        self, task: Dict[str, List[Dict[str, List[List[int]]]]]
-    ) -> Tuple[List[List[List[int]]], List[List[List[int]]]]:
-        """Solve a task and ensure two diverse attempts.
-
-        Args:
-            task: ARC task specification.
-
-        Returns:
-            A tuple ``(attempt1, attempt2)`` each being a list of output grids
-            corresponding to the test inputs.
-        """
-
-        result = self.solve_task(task)
-        attempt1 = result["attempt_1"]
-        attempt2 = result["attempt_2"]
-
-        if attempt1 == attempt2:
-            alt = self._second_pass_diversified(task)
-            if alt is not None:
-                attempt2 = alt
-
-        self._last_outputs = (attempt1, attempt2)
-        return attempt1, attempt2
-
-    def _second_pass_diversified(
-        self, task: Dict[str, List[Dict[str, List[List[int]]]]]
-    ) -> Optional[List[List[List[int]]]]:
-        """Run a diversified second search pass to obtain an alternative output."""
-
-        train_pairs = [
-            (to_array(p["input"]), to_array(p["output"])) for p in task["train"]
-        ]
-        test_inputs = [to_array(p["input"]) for p in task["test"]]
-
-        try:
-            programs = synthesize_with_enhancements(train_pairs, force_alt=True)
-            attempts = predict_two_enhanced(programs, test_inputs, prefer_diverse=True)
-            return [to_list(x) for x in attempts[0]]
-        except Exception:
-            try:
-                programs = synth_baseline(train_pairs)
-                attempts = predict_two_baseline(
-                    programs, test_inputs, prefer_diverse=True
-                )
-                return [to_list(x) for x in attempts[0]]
-            except Exception:
-                return None
-
-    def best_so_far(
-        self, task: Dict[str, List[Dict[str, List[List[int]]]]]
-    ) -> List[List[List[int]]]:
-        """Return the best outputs computed so far for the current task.
-
-        If the solver has produced at least one attempt, that attempt is
-        returned. Otherwise, the identity transformation of the first test
-        input is used as a safe fallback.
-        """
-
-        if self._last_outputs is not None:
-            return self._last_outputs[0]
-        return [task["test"][0]["input"]]
-    
-    def _validate_solution(self, attempts: List[List[Array]], test_inputs: List[Array]) -> bool:
-        """Basic validation to check if solution seems reasonable."""
-        if not attempts or len(attempts) != 2:
-            return False
-        
-        for attempt in attempts:
-            if len(attempt) != len(test_inputs):
-                return False
-            
-            # Check that outputs are not just copies of inputs (unless that's valid)
-            for inp, out in zip(test_inputs, attempt):
-                if out.shape[0] == 0 or out.shape[1] == 0:  # Empty output
-                    return False
-                if np.max(out) > 9:  # Invalid color values
-                    return False
-        
-        return True
-    
-    def get_statistics(self) -> Dict[str, float]:
-        """Get solver performance statistics."""
-        success_rate = self.stats['tasks_solved'] / max(1, self.stats['total_tasks'])
-        return {
-            'success_rate': success_rate,
-            'total_tasks': self.stats['total_tasks'],
-            'tasks_solved': self.stats['tasks_solved'],
-            'fallback_usage': self.stats['fallback_used'] / max(1, self.stats['total_tasks']),
-        }
-
-
-# Global solver instance (for backwards compatibility)
-_global_solver = None
-
-
-def solve_task(task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
-    """Solve a single ARC task (backwards compatible interface)."""
-    global _global_solver
-    
-    if _global_solver is None:
-        # Check if we should use enhancements (default: yes, unless disabled)
-        use_enhancements = os.environ.get('ARC_DISABLE_ENHANCEMENTS', '').lower() not in ('1', 'true', 'yes')
-        _global_solver = ARCSolver(use_enhancements=use_enhancements)
-    
-    return _global_solver.solve_task(task)
-
-
-def get_solver_stats() -> Dict[str, float]:
-    """Get global solver statistics."""
-    global _global_solver
-    if _global_solver is None:
-        return {}
-    return _global_solver.get_statistics()
-
-
-# Enhanced solver for direct use
-def solve_task_enhanced(task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
-    """Solve using enhanced methods only."""
-    solver = ARCSolver(use_enhancements=True)
-    return solver.solve_task(task)
-
-
-# Baseline solver for comparison
-def solve_task_baseline(task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
-    """Solve using baseline methods only."""
-    solver = ARCSolver(use_enhancements=False)
-    return solver.solve_task(task)
diff --git a/arc_solver/neural/__init__.py b/arc_solver/neural/__init__.py
new file mode 100644
index 0000000..4b7896f
--- /dev/null
+++ b/arc_solver/neural/__init__.py
@@ -0,0 +1,17 @@
+"""Neural components for the ARC solver."""
+
+from .guidance import SimpleClassifier, HeuristicGuidance, NeuralGuidance
+from .episodic import Episode, EpisodeDatabase, EpisodicRetrieval
+from .sketches import ProgramSketch, SketchMiner, generate_parameter_grid
+
+__all__ = [
+    "SimpleClassifier",
+    "HeuristicGuidance",
+    "NeuralGuidance",
+    "Episode",
+    "EpisodeDatabase",
+    "EpisodicRetrieval",
+    "ProgramSketch",
+    "SketchMiner",
+    "generate_parameter_grid",
+]
diff --git a/arc_solver/memory.py b/arc_solver/neural/episodic.py
similarity index 98%
rename from arc_solver/memory.py
rename to arc_solver/neural/episodic.py
index 56c492b..830825d 100644
--- a/arc_solver/memory.py
+++ b/arc_solver/neural/episodic.py
@@ -14,15 +14,15 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Tuple, Optional
+from typing import Any, Dict, List, Optional, Tuple
 from collections import defaultdict
 import json
 import os
 
 import numpy as np
 
-from .grid import Array
-from .features import compute_task_signature, extract_task_features
+from ..grid import Array
+from ..features import compute_task_signature, extract_task_features
 
 # Type alias for a DSL program representation used across the project
 Program = List[Tuple[str, Dict[str, Any]]]
diff --git a/arc_solver/guidance.py b/arc_solver/neural/guidance.py
similarity index 74%
rename from arc_solver/guidance.py
rename to arc_solver/neural/guidance.py
index ecf38b3..b1a37d8 100644
--- a/arc_solver/guidance.py
+++ b/arc_solver/neural/guidance.py
@@ -9,13 +9,14 @@
 
 from __future__ import annotations
 
-import numpy as np
-from typing import List, Tuple, Dict, Any, Optional
 import json
 import os
+from typing import Any, Dict, List, Optional, Tuple
+
+import numpy as np
 
-from .grid import Array
-from .features import extract_task_features
+from ..grid import Array
+from ..features import extract_task_features
 
 
 class SimpleClassifier:
@@ -180,13 +181,51 @@ def score_operations(self, train_pairs: List[Tuple[Array, Array]]) -> Dict[str,
         
         return scores
     
-    def load_model(self, model_path: str):
-        """Load a trained neural model."""
-        # Placeholder for model loading logic
-        # In practice, this would load weights from disk
-        pass
-    
-    def save_model(self, model_path: str):
-        """Save the trained neural model."""
-        # Placeholder for model saving logic
-        pass
+    def load_model(self, model_path: str) -> None:
+        """Load a trained neural model from ``model_path``.
+
+        The model is stored as JSON containing the network weights and
+        configuration.  If loading fails, a :class:`ValueError` is raised to
+        signal the caller that the model file is invalid.
+        """
+        try:
+            with open(model_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except FileNotFoundError as exc:  # pragma: no cover - defensive
+            raise FileNotFoundError(f"model file not found: {model_path}") from exc
+        except json.JSONDecodeError as exc:  # pragma: no cover - defensive
+            raise ValueError(f"invalid model file: {exc}") from exc
+
+        try:
+            self.neural_model = SimpleClassifier(
+                input_dim=int(data["input_dim"]),
+                hidden_dim=int(data.get("hidden_dim", 32)),
+            )
+            self.neural_model.weights1 = np.array(data["weights1"], dtype=float)
+            self.neural_model.bias1 = np.array(data["bias1"], dtype=float)
+            self.neural_model.weights2 = np.array(data["weights2"], dtype=float)
+            self.neural_model.bias2 = np.array(data["bias2"], dtype=float)
+            if "operations" in data:
+                self.neural_model.operations = list(data["operations"])
+        except KeyError as exc:  # pragma: no cover - defensive
+            raise ValueError(f"missing field in model file: {exc}") from exc
+
+    def save_model(self, model_path: str) -> None:
+        """Persist the neural model to ``model_path`` in JSON format."""
+        if self.neural_model is None:
+            raise ValueError("no neural model to save")
+
+        data = {
+            "input_dim": self.neural_model.input_dim,
+            "hidden_dim": self.neural_model.hidden_dim,
+            "weights1": self.neural_model.weights1.tolist(),
+            "bias1": self.neural_model.bias1.tolist(),
+            "weights2": self.neural_model.weights2.tolist(),
+            "bias2": self.neural_model.bias2.tolist(),
+            "operations": self.neural_model.operations,
+        }
+
+        tmp_path = f"{model_path}.tmp"
+        with open(tmp_path, "w", encoding="utf-8") as f:
+            json.dump(data, f)
+        os.replace(tmp_path, model_path)
diff --git a/arc_solver/sketches.py b/arc_solver/neural/sketches.py
similarity index 98%
rename from arc_solver/sketches.py
rename to arc_solver/neural/sketches.py
index 0e1582b..f752b13 100644
--- a/arc_solver/sketches.py
+++ b/arc_solver/neural/sketches.py
@@ -9,12 +9,12 @@
 from __future__ import annotations
 
 import numpy as np
-from typing import List, Tuple, Dict, Any, Optional
+from typing import Any, Dict, List, Optional, Tuple
 from collections import Counter, defaultdict
 import json
 
-from .grid import Array
-from .dsl import apply_program
+from ..grid import Array
+from ..dsl import apply_program
 
 
 class ProgramSketch:
diff --git a/arc_solver/solver.py b/arc_solver/solver.py
index 31472b6..68b9435 100644
--- a/arc_solver/solver.py
+++ b/arc_solver/solver.py
@@ -1,97 +1,243 @@
-"""
-Top-level solver interface for ARC tasks.
+"""Top-level solver interface for ARC tasks with neural enhancements.
 
-This module ties together the grid utilities, program synthesis search, and
-heuristics to produce solutions for ARC tasks. Given a task dictionary with
-training and test input/output pairs (in the ARC JSON format), it returns
-predicted outputs for the test inputs in the required format.
+This module integrates neural guidance, episodic retrieval, program sketches and
+test-time training to provide state-of-the-art solutions for ARC tasks while
+maintaining a robust fallback baseline.
 """
 
 from __future__ import annotations
 
-from typing import Dict, List, Tuple
-
+from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
-
-from .grid import to_array, to_list, Array
-from .search import synthesize, predict_two
-from .enhanced_solver import solve_task as solve_task_enhanced
 import os
 
+from .grid import to_array, to_list, Array
+from .search import (
+    synthesize as synth_baseline,
+    predict_two as predict_two_baseline,
+)
+from .enhanced_search import synthesize_with_enhancements, predict_two_enhanced
 
-def solve_task(task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
-    """Solve a single ARC task.
 
-    The task dictionary contains 'train' and 'test' lists. Each train item has
-    'input' and 'output' grids. Each test item has 'input' and optionally
-    'output' (if known). This function synthesizes a program using the train
-    pairs and applies the best two programs to the test inputs.
-    
-    By default, uses enhanced solver with neural guidance, episodic retrieval,
-    and test-time training. Set ARC_USE_BASELINE=1 to use baseline only.
-    """
-    # Check if enhanced solving is disabled
-    use_baseline = os.environ.get('ARC_USE_BASELINE', '').lower() in ('1', 'true', 'yes')
+class ARCSolver:
+    """Enhanced ARC solver with neural components and episodic memory."""
     
-    if not use_baseline:
-        try:
-            result = solve_task_enhanced(task)
-            # If enhanced solver returns degenerate zeros, retry with baseline
-            if any(
-                arr and isinstance(arr, list) and np.all(np.array(arr) == 0)
-                for arr in result.get("attempt_1", [])
-            ):
-                raise ValueError("degenerate enhanced result")
-            return result
-        except Exception:
-            # Fall back to baseline if enhanced fails or produces invalid output
-            pass
+    def __init__(self, use_enhancements: bool = True,
+                 guidance_model_path: str = None,
+                 episode_db_path: str = "episodes.json"):
+        self.use_enhancements = use_enhancements
+        self.guidance_model_path = guidance_model_path
+        self.episode_db_path = episode_db_path
+        self.stats = {
+            'tasks_solved': 0,
+            'total_tasks': 0,
+            'enhancement_success_rate': 0.0,
+            'fallback_used': 0,
+        }
+        self._last_outputs: Optional[Tuple[List[List[List[int]]], List[List[List[int]]]]] = None
     
-    # Baseline implementation
-    # Extract training pairs as numpy arrays
-    train_pairs: List[Tuple[Array, Array]] = []
-    for pair in task["train"]:
+    def solve_task(self, task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
+        """Solve a single ARC task using enhanced or baseline methods."""
+        self.stats['total_tasks'] += 1
+        
+        # Extract training pairs as numpy arrays, skipping malformed ones
+        train_pairs: List[Tuple[Array, Array]] = []
+        for pair in task.get("train", []):
+            try:
+                a = to_array(pair["input"])
+                b = to_array(pair["output"])
+            except Exception:
+                continue
+            train_pairs.append((a, b))
+
+        # Extract test inputs with graceful degradation
+        test_inputs: List[Array] = []
+        for pair in task.get("test", []):
+            try:
+                test_inputs.append(to_array(pair["input"]))
+            except Exception:
+                test_inputs.append(np.zeros((1, 1), dtype=np.int16))
+
+        if not train_pairs:
+            return {
+                "attempt_1": [to_list(arr) for arr in test_inputs],
+                "attempt_2": [to_list(arr) for arr in test_inputs],
+            }
+        
+        # Try enhanced synthesis first, fall back to baseline if needed
         try:
-            a = to_array(pair["input"])
-            b = to_array(pair["output"])
+            if self.use_enhancements:
+                progs = synthesize_with_enhancements(train_pairs)
+                attempts = predict_two_enhanced(progs, test_inputs)
+                
+                # Check if we got a reasonable solution
+                if self._validate_solution(attempts, test_inputs):
+                    if any(np.all(out == 0) for out in attempts[0]):
+                        self.stats['fallback_used'] += 1
+                        raise Exception("Enhanced search produced degenerate output")
+                    self.stats['tasks_solved'] += 1
+                    return {
+                        "attempt_1": [to_list(arr) for arr in attempts[0]],
+                        "attempt_2": [to_list(arr) for arr in attempts[1]],
+                    }
+                else:
+                    # Enhancement didn't work, try fallback
+                    self.stats['fallback_used'] += 1
+                    raise Exception("Enhanced search failed validation")
+            else:
+                raise Exception("Enhancements disabled")
+                
         except Exception:
-            # Skip malformed training examples
-            continue
-        train_pairs.append((a, b))
+            # Fall back to baseline approach
+            progs = synth_baseline(train_pairs)
+            attempts = predict_two_baseline(progs, test_inputs)
+
+            # Sanity check predictions and fall back to identity if needed
+            fixed_attempts: List[List[Array]] = [[], []]
+            for idx, pred in enumerate(attempts[0]):
+                if pred is None or pred.size == 0 or np.all(pred == 0):
+                    fixed_attempts[0].append(test_inputs[idx])
+                else:
+                    fixed_attempts[0].append(pred)
+            fixed_attempts[1] = attempts[1] if len(attempts) > 1 else fixed_attempts[0]
+
+            return {
+                "attempt_1": [to_list(arr) for arr in fixed_attempts[0]],
+                "attempt_2": [to_list(arr) for arr in fixed_attempts[1]],
+            }
+
+    def solve_task_two_attempts(
+        self, task: Dict[str, List[Dict[str, List[List[int]]]]]
+    ) -> Tuple[List[List[List[int]]], List[List[List[int]]]]:
+        """Solve a task and ensure two diverse attempts.
+
+        Args:
+            task: ARC task specification.
+
+        Returns:
+            A tuple ``(attempt1, attempt2)`` each being a list of output grids
+            corresponding to the test inputs.
+        """
+
+        result = self.solve_task(task)
+        attempt1 = result["attempt_1"]
+        attempt2 = result["attempt_2"]
+
+        if attempt1 == attempt2:
+            alt = self._second_pass_diversified(task)
+            if alt is not None:
+                attempt2 = alt
+
+        self._last_outputs = (attempt1, attempt2)
+        return attempt1, attempt2
+
+    def _second_pass_diversified(
+        self, task: Dict[str, List[Dict[str, List[List[int]]]]]
+    ) -> Optional[List[List[List[int]]]]:
+        """Run a diversified second search pass to obtain an alternative output."""
+
+        train_pairs = [
+            (to_array(p["input"]), to_array(p["output"])) for p in task["train"]
+        ]
+        test_inputs = [to_array(p["input"]) for p in task["test"]]
 
-    # Extract test inputs with graceful degradation
-    test_inputs: List[Array] = []
-    for pair in task["test"]:
         try:
-            test_inputs.append(to_array(pair["input"]))
+            programs = synthesize_with_enhancements(train_pairs, force_alt=True)
+            attempts = predict_two_enhanced(programs, test_inputs, prefer_diverse=True)
+            return [to_list(x) for x in attempts[0]]
         except Exception:
-            test_inputs.append(np.zeros((1, 1), dtype=np.int16))
+            try:
+                programs = synth_baseline(train_pairs)
+                attempts = predict_two_baseline(
+                    programs, test_inputs, prefer_diverse=True
+                )
+                return [to_list(x) for x in attempts[0]]
+            except Exception:
+                return None
+
+    def best_so_far(
+        self, task: Dict[str, List[Dict[str, List[List[int]]]]]
+    ) -> List[List[List[int]]]:
+        """Return the best outputs computed so far for the current task.
+
+        If the solver has produced at least one attempt, that attempt is
+        returned. Otherwise, the identity transformation of the first test
+        input is used as a safe fallback.
+        """
 
-    if not train_pairs:
-        # Without training data we can only echo the inputs
+        if self._last_outputs is not None:
+            return self._last_outputs[0]
+        return [task["test"][0]["input"]]
+    
+    def _validate_solution(self, attempts: List[List[Array]], test_inputs: List[Array]) -> bool:
+        """Basic validation to check if solution seems reasonable."""
+        if not attempts or len(attempts) != 2:
+            return False
+        
+        for attempt in attempts:
+            if len(attempt) != len(test_inputs):
+                return False
+            
+            # Check that outputs are not just copies of inputs (unless that's valid)
+            for inp, out in zip(test_inputs, attempt):
+                if out.shape[0] == 0 or out.shape[1] == 0:  # Empty output
+                    return False
+                if np.max(out) > 9:  # Invalid color values
+                    return False
+        
+        return True
+    
+    def get_statistics(self) -> Dict[str, float]:
+        """Get solver performance statistics."""
+        success_rate = self.stats['tasks_solved'] / max(1, self.stats['total_tasks'])
         return {
-            "attempt_1": [to_list(arr) for arr in test_inputs],
-            "attempt_2": [to_list(arr) for arr in test_inputs],
+            'success_rate': success_rate,
+            'total_tasks': self.stats['total_tasks'],
+            'tasks_solved': self.stats['tasks_solved'],
+            'fallback_usage': self.stats['fallback_used'] / max(1, self.stats['total_tasks']),
         }
 
-    # Synthesize candidate programs and predict outputs
-    progs = synthesize(train_pairs)
-    if not progs:
-        attempts = [test_inputs, test_inputs]
-    else:
-        attempts = predict_two(progs, test_inputs)
-        # Basic sanity fallback: if predictions look degenerate, use identity
-        fixed_attempts: List[List[Array]] = [[], []]
-        for idx, pred in enumerate(attempts[0]):
-            if pred is None or pred.size == 0 or np.all(pred == 0):
-                fixed_attempts[0].append(test_inputs[idx])
-            else:
-                fixed_attempts[0].append(pred)
-        fixed_attempts[1] = attempts[1] if len(attempts) > 1 else fixed_attempts[0]
-        attempts = fixed_attempts
-
-    # Convert outputs back to nested lists
-    return {
-        "attempt_1": [to_list(arr) for arr in attempts[0]],
-        "attempt_2": [to_list(arr) for arr in attempts[1]],
-    }
\ No newline at end of file
+
+# Global solver instance (for backwards compatibility)
+_global_solver = None
+
+
+def solve_task(task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
+    """Solve a single ARC task (backwards compatible interface)."""
+    global _global_solver
+    
+    if _global_solver is None:
+        # Determine whether to enable enhancements. Baseline can be forced via
+        # ``ARC_USE_BASELINE`` or by explicitly disabling enhancements.
+        use_baseline = os.environ.get('ARC_USE_BASELINE', '').lower() in (
+            '1', 'true', 'yes'
+        )
+        enhancements_disabled = os.environ.get('ARC_DISABLE_ENHANCEMENTS', '').lower() in (
+            '1', 'true', 'yes'
+        )
+        use_enhancements = not use_baseline and not enhancements_disabled
+        _global_solver = ARCSolver(use_enhancements=use_enhancements)
+    
+    return _global_solver.solve_task(task)
+
+
+def get_solver_stats() -> Dict[str, float]:
+    """Get global solver statistics."""
+    global _global_solver
+    if _global_solver is None:
+        return {}
+    return _global_solver.get_statistics()
+
+
+# Enhanced solver for direct use
+def solve_task_enhanced(task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
+    """Solve using enhanced methods only."""
+    solver = ARCSolver(use_enhancements=True)
+    return solver.solve_task(task)
+
+
+# Baseline solver for comparison
+def solve_task_baseline(task: Dict[str, List[Dict[str, List[List[int]]]]]) -> Dict[str, List[List[List[int]]]]:
+    """Solve using baseline methods only."""
+    solver = ARCSolver(use_enhancements=False)
+    return solver.solve_task(task)
diff --git a/arc_submit.py b/arc_submit.py
index 9c4336f..4ab2760 100644
--- a/arc_submit.py
+++ b/arc_submit.py
@@ -19,7 +19,7 @@
 
 import numpy as np
 
-from arc_solver.enhanced_solver import ARCSolver
+from arc_solver.solver import ARCSolver
 from arc_solver.io_utils import load_rerun_json, save_submission
 
 
diff --git a/docs/architecture.md b/docs/architecture.md
index 836450b..9461981 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -6,11 +6,11 @@ PUMA (Program Understanding and Meta-learning for ARC) is a neuroscience-inspire
 
 ### Neuroscience-Inspired Components
 
-1. **Multiple-Demand (MD) Network Analog**: The neural guidance system (`arc_solver/guidance.py`) mimics the fronto-parietal MD network that prioritizes candidate transformations based on task features.
+1. **Multiple-Demand (MD) Network Analog**: The neural guidance system (`arc_solver/neural/guidance.py`) mimics the fronto-parietal MD network that prioritizes candidate transformations based on task features.
 
 2. **Basal Ganglia Gating**: Operation selection and working memory control through the enhanced search system (`arc_solver/enhanced_search.py`) that gates which programs enter the beam search.
 
-3. **Hippocampal-mPFC Loop**: Episodic retrieval (`arc_solver/memory.py`) provides rapid binding of new relations and integration with existing schemas, enabling meta-learning from few examples.
+3. **Hippocampal-mPFC Loop**: Episodic retrieval (`arc_solver/neural/episodic.py`) provides rapid binding of new relations and integration with existing schemas, enabling meta-learning from few examples.
 
 4. **Test-Time Adaptation**: The TTT system (`arc_solver/ttt.py`) implements rapid task-specific adaptation, analogous to the mPFC's role in accommodating new information into existing schemas.
 
@@ -26,10 +26,11 @@ PUMA/
 │   ├── search.py                 # Basic search algorithms
 │   ├── solver.py                 # Main solver interface
 │   ├── features.py               # Task feature extraction
-│   ├── guidance.py               # Neural guidance (MD network analog)
-│   ├── sketches.py               # Program sketch mining
-│   ├── memory.py                 # Episodic retrieval (hippocampus analog)
-│   └── ttt.py                    # Test-time training (mPFC analog)
+│   ├── ttt.py                    # Test-time training (mPFC analog)
+│   └── neural/                   # Neural components
+│       ├── guidance.py           # Neural guidance (MD network analog)
+│       ├── sketches.py           # Program sketch mining
+│       └── episodic.py           # Episodic retrieval (hippocampus analog)
 ├── tools/                        # Training and analysis tools
 └── tests/                        # Comprehensive test suite
 ```
diff --git a/tests/test_guidance.py b/tests/test_guidance.py
index 229849a..bfad494 100644
--- a/tests/test_guidance.py
+++ b/tests/test_guidance.py
@@ -15,7 +15,11 @@
 
 from arc_solver.grid import to_array
 from arc_solver.features import extract_task_features
-from arc_solver.guidance import NeuralGuidance, SimpleClassifier, HeuristicGuidance
+from arc_solver.neural.guidance import (
+    NeuralGuidance,
+    SimpleClassifier,
+    HeuristicGuidance,
+)
 
 
 class TestNeuralGuidance:
diff --git a/tests/test_memory.py b/tests/test_memory.py
index a1bc63f..5f654b4 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -16,7 +16,7 @@
 sys.path.append(str(Path(__file__).parent.parent))
 
 from arc_solver.grid import to_array
-from arc_solver.memory import EpisodicRetrieval, EpisodeDatabase, Episode
+from arc_solver.neural.episodic import EpisodicRetrieval, EpisodeDatabase, Episode
 from arc_solver.features import compute_task_signature
 
 
diff --git a/tools/benchmark.py b/tools/benchmark.py
index 7b19521..f687fc4 100644
--- a/tools/benchmark.py
+++ b/tools/benchmark.py
@@ -14,7 +14,7 @@
 from pathlib import Path
 import os
 
-from arc_solver.enhanced_solver import ARCSolver, solve_task_enhanced, solve_task_baseline
+from arc_solver.solver import ARCSolver, solve_task_enhanced, solve_task_baseline
 from arc_solver.neural.episodic import EpisodicRetrieval
 from arc_solver.neural.sketches import SketchMiner
 
diff --git a/tools/build_memory.py b/tools/build_memory.py
index 24c6e0f..9ad3982 100644
--- a/tools/build_memory.py
+++ b/tools/build_memory.py
@@ -17,7 +17,7 @@
 
 from arc_solver.grid import to_array
 from arc_solver.features import compute_task_signature
-from arc_solver.memory import EpisodicRetrieval
+from arc_solver.neural.episodic import EpisodicRetrieval
 from arc_solver.heuristics import consistent_program_single_step, score_candidate
 
 
diff --git a/tools/mine_sketches.py b/tools/mine_sketches.py
index f53a2ab..867932e 100644
--- a/tools/mine_sketches.py
+++ b/tools/mine_sketches.py
@@ -17,7 +17,7 @@
 
 from arc_solver.grid import to_array
 from arc_solver.features import extract_task_features
-from arc_solver.sketches import SketchMiner, ProgramSketch
+from arc_solver.neural.sketches import SketchMiner, ProgramSketch
 from arc_solver.heuristics import consistent_program_single_step
 
 
diff --git a/tools/train_guidance.py b/tools/train_guidance.py
index 9e63b2e..f05567d 100644
--- a/tools/train_guidance.py
+++ b/tools/train_guidance.py
@@ -18,7 +18,7 @@
 
 from arc_solver.grid import to_array
 from arc_solver.features import extract_task_features
-from arc_solver.guidance import SimpleClassifier
+from arc_solver.neural.guidance import SimpleClassifier
 
 
 def load_training_data(challenges_path: str, solutions_path: str = None) -> List[Dict[str, Any]]:
diff --git a/tools/verify_layout.py b/tools/verify_layout.py
index 97b1ec2..4dde327 100644
--- a/tools/verify_layout.py
+++ b/tools/verify_layout.py
@@ -5,8 +5,9 @@
   "arc_solver/__init__.py",
   "arc_solver/dsl.py", "arc_solver/grid.py", "arc_solver/heuristics.py", "arc_solver/objects.py",
   "arc_solver/search.py", "arc_solver/solver.py", "arc_solver/io_utils.py",
-  "arc_solver/features.py", "arc_solver/guidance.py", "arc_solver/sketches.py",
-  "arc_solver/memory.py", "arc_solver/ttt.py",
+  "arc_solver/features.py", "arc_solver/neural/guidance.py",
+  "arc_solver/neural/sketches.py", "arc_solver/neural/episodic.py",
+  "arc_solver/ttt.py",
   "tools/train_guidance.py", "tools/mine_sketches.py", "tools/build_memory.py", "tools/benchmark.py",
   "models", "data", "notebooks", "scripts", "submission",
   "arc_submit.py", "requirements.txt",