DocGarbanzo
diff --git a/‎CLAUDE.md‎
Lines changed: 51 additions & 12 deletions b/‎CLAUDE.md‎
Lines changed: 51 additions & 12 deletions
diff --git a/‎donkeycar/parts/tub_statistics.py‎
Lines changed: 90 additions & 38 deletions b/‎donkeycar/parts/tub_statistics.py‎
Lines changed: 90 additions & 38 deletions
diff --git a/‎donkeycar/pipeline/training.py‎
Lines changed: 11 additions & 0 deletions b/‎donkeycar/pipeline/training.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎donkeycar/pipeline/types.py‎
Lines changed: 38 additions & 7 deletions b/‎donkeycar/pipeline/types.py‎
Lines changed: 38 additions & 7 deletions
@@ -473,9 +473,10 @@ Visualizes recorded vehicle trajectories, detects laps, computes mean reference
 courses, and segments courses into geometric features.
 
 **Segment stats:** For Tub data, imupath computes segment performance on the
-fly using `FIELD_AGGREGATIONS` and `LAP_SORTING_CRITERIA`. It loads
-`./config.py` by default; pass `--config` to use another config and include
-custom tub fields in the Segment Stats selector.
+fly using `FIELD_AGGREGATIONS` (the single source of truth for both field
+aggregation and ranking). It loads `./config.py` by default; pass `--config`
+to use another config and include custom tub fields in the Segment Stats
+selector.
 Web UI segment stats use TubStatistics session rankings from manifest
 metadata, so `donkey segment` must have stored segmentation data for the
 session.
@@ -602,12 +603,18 @@ lap" that outperforms any single recorded lap.
 **Data Structure:**
 - Segment assignments stored in manifest metadata (NOT in catalog records)
 - Metadata stores: segmentation parameters, segment boundaries, rankings
-- Performance rankings: `session_rank[session_id][lap_num][segment_id] =
-  [time_pct, gyro_z_pct, distance_pct]`
+- Performance rankings computed from FIELD_AGGREGATIONS:
+  `session_rank[session_id][lap_num][segment_id] = {field1_pct, field2_pct, ...}`
+- The `lap_pct` vector passed to training matches FIELD_AGGREGATIONS order:
+  `[time_pct, distance_pct, gyro_z_pct, ...]`
 
 **IMPORTANT:** See "Tub Data Integrity" section - segment data is computed at
 training time from manifest metadata, NOT stored in individual records.
 
+**Single source of truth:** `FIELD_AGGREGATIONS` defines both what gets
+aggregated AND how laps/segments are ranked. The order of entries determines
+ranking priority (first entry is primary sort key).
+
 **Example:** 3 laps, 4 segments per lap
 
 Lap 1: Segments [Fast, Slow, Medium, Fast]
@@ -625,17 +632,49 @@ segment!
 
 ### Configuration
 
-`donkeycar/templates/cfg_complete.py`:
+**Primary config:** `donkeycar/templates/cfg_donkey5.py` (cfg_complete.py uses
+deprecated LAP_SORTING_CRITERIA for backward compatibility)
 
 ```python
-#SEGMENT PERFORMANCE
-SEGMENT_PCT_MODE = False  # True = segment-based, False = lap-based
-SEGMENT_STRATEGY = 'hybrid'  # threshold, extrema, gradient, or hybrid
-SEGMENT_LAP_DETECTOR = 'ycrossing'  # ycrossing or drift
-SEGMENT_MIN_LENGTH = 1.0  # Minimum segment length in meters
-SEGMENT_CURVATURE_THRESHOLD = 0.1  # Curvature threshold for segmentation
+# Enable segment-based training
+SEGMENT_PCT_MODE = True  # True = segment-based, False = lap-based
+
+# FIELD_AGGREGATIONS: Single source of truth for:
+# 1. Which fields to aggregate per lap/segment
+# 2. How to rank laps/segments (order matters!)
+# 3. What goes into the lap_pct vector for training
+
+def abs_transform(value):
+    return abs(value)
+
+FIELD_AGGREGATIONS = [
+    # Primary ranking: lap/segment time
+    {'output_key': 'time'},        # Boundary field (no 'field' key)
+    # Secondary ranking: distance
+    {'output_key': 'distance'},    # Boundary field
+    # Tertiary ranking: smoothness via gyro Z-axis
+    {
+        'field': 'car/gyro',       # Record field
+        'index': 2,
+        'output_key': 'gyro_z_agg',
+        'transform': abs_transform,
+        'aggregation': 'avg'
+    }
+]
+
+# To train using ONLY time and distance (no behavioral metrics):
+# FIELD_AGGREGATIONS = [
+#     {'output_key': 'time'},
+#     {'output_key': 'distance'}
+# ]
 ```
 
+**Field types:**
+- **Boundary fields**: Computed from lap/segment timing (time, distance).
+  No 'field' key.
+- **Record fields**: Extracted from tub records (gyro, accel, speed). Have
+  'field' key.
+
 ### Iterative Training Strategy
 
 1. Train with segment_pct on initial multi-lap data
 
@@ -125,15 +125,32 @@ def accumulate_fields(self, record):
 
 @dataclass
 class FieldAggregationSpec:
-    """Specification for aggregating a field across lap/segment."""
-    field: str                           # e.g., 'car/gyro'
-    output_key: str                      # e.g., 'gyro_z_agg'
+    """
+    Specification for aggregating a field across lap/segment.
+
+    Two types of fields:
+    1. Boundary fields: computed from lap/segment boundaries (time, distance)
+       - No 'field' attribute (field=None)
+       - Computed in _finalize_segment_instance()
+    2. Record fields: extracted from individual records (gyro, accel, etc.)
+       - Has 'field' attribute (e.g., 'car/gyro')
+       - Aggregated across records in _aggregate_single_field()
+    """
+    output_key: str                      # e.g., 'gyro_z_agg' or 'time'
+    field: Optional[str] = None          # e.g., 'car/gyro' (None for boundary)
     index: Optional[int] = None          # Vector index (None for scalars)
     transform: Optional[Callable] = None # Transform function
     aggregation: str = 'avg'             # avg, sum, min, max, median, delta
+    reverse: bool = False                # For sorting: True = descending
+
+    def is_boundary_field(self) -> bool:
+        """Check if this is a boundary field (time/distance)."""
+        return self.field is None
 
     def extract(self, record: dict) -> Optional[float]:
         """Extract and transform value from record."""
+        if self.is_boundary_field():
+            return None  # Boundary fields are not extracted from records
         try:
             value = record[self.field]
             if self.index is not None:
@@ -193,9 +210,10 @@ def __init__(self,
         Construct tub statistics calculator for tub
 
         :param tub:                 input tub
-        :param config:              Config object (loads FIELD_AGGREGATIONS,
-                                    LAP_SORTING_CRITERIA). Required if
-                                    field_aggregations not provided.
+        :param config:              Config object (loads FIELD_AGGREGATIONS).
+                                    FIELD_AGGREGATIONS is the single source of
+                                    truth for both aggregation and ranking.
+                                    Required if field_aggregations not provided.
         :param sorting_strategy:    Optional custom sorting strategy
         :param field_aggregations:  Optional list of FieldAggregationSpec or
                                     dicts. Required if config not provided.
@@ -233,7 +251,8 @@ def _normalize_field_aggregations(self, field_aggregations: List) -> List[
                                          FieldAggregationSpec]:
         """Convert dict specs to FieldAggregationSpec.
 
-        :raises ValueError: If old-style extractor syntax is used.
+        :raises ValueError: If old-style extractor syntax is used or required
+                           fields are missing.
         """
         normalized = []
         for spec in field_aggregations:
@@ -245,12 +264,17 @@ def _normalize_field_aggregations(self, field_aggregations: List) -> List[
                         f'Old-style field_aggregations with "extractor" '
                         f'not supported for field {spec.get("field", "?")}. '
                         f'Use "index" parameter instead.')
+                if 'output_key' not in spec:
+                    raise ValueError(
+                        f'Field aggregation missing required "output_key": '
+                        f'{spec}')
                 normalized.append(FieldAggregationSpec(
-                    field=spec['field'],
                     output_key=spec['output_key'],
+                    field=spec.get('field'),  # None for boundary fields
                     index=spec.get('index'),
                     transform=spec.get('transform'),
-                    aggregation=spec.get('aggregation', 'avg')
+                    aggregation=spec.get('aggregation', 'avg'),
+                    reverse=spec.get('reverse', False)
                 ))
         return normalized
 
@@ -266,37 +290,60 @@ def _load_field_aggregations_from_config(self, config) -> List[
             raise ValueError(
                 'FIELD_AGGREGATIONS not found in config. '
                 'Please define FIELD_AGGREGATIONS in your config file. '
-                'Example: FIELD_AGGREGATIONS = [{"field": "car/gyro", '
-                '"output_key": "gyro_z_agg", "index": 1, "aggregation": "avg"}]')
-
-        # Convert config dicts to FieldAggregationSpec
-        specs = []
-        for spec_dict in config_specs:
-            spec = FieldAggregationSpec(
-                field=spec_dict['field'],
-                output_key=spec_dict['output_key'],
-                index=spec_dict.get('index'),
-                transform=spec_dict.get('transform'),
-                aggregation=spec_dict.get('aggregation', 'avg')
-            )
-            specs.append(spec)
-            logger.info(f'Loaded field aggregation: {spec.output_key} from '
-                       f'{spec.field}[{spec.index}] using {spec.aggregation}')
+                'Example: FIELD_AGGREGATIONS = [\n'
+                '  {"output_key": "time"},  # Boundary field\n'
+                '  {"output_key": "distance"},\n'
+                '  {"field": "car/gyro", "output_key": "gyro_z_agg", '
+                '"index": 2, "aggregation": "avg"}\n'
+                ']')
+
+        # Use normalization method for consistency
+        specs = self._normalize_field_aggregations(config_specs)
+
+        for spec in specs:
+            if spec.is_boundary_field():
+                logger.info(f'Loaded boundary field: {spec.output_key}')
+            else:
+                logger.info(
+                    f'Loaded field aggregation: {spec.output_key} from '
+                    f'{spec.field}[{spec.index}] using {spec.aggregation}')
 
         return specs
 
     def _load_sorting_strategy_from_config(self, config) -> SortingStrategy:
-        """Load sorting strategy from config."""
-        criteria = getattr(config, 'LAP_SORTING_CRITERIA', None)
-        if criteria:
-            logger.info(f'Loaded sorting criteria from config: '
-                       f'{[c["key"] for c in criteria]}')
+        """
+        Load sorting strategy from config.
+
+        Strategy is built from FIELD_AGGREGATIONS (single source of truth).
+        For backward compatibility, falls back to LAP_SORTING_CRITERIA if found.
+        """
+        # Check for deprecated LAP_SORTING_CRITERIA
+        old_criteria = getattr(config, 'LAP_SORTING_CRITERIA', None)
+        if old_criteria:
+            logger.warning(
+                'LAP_SORTING_CRITERIA is DEPRECATED. '
+                'Use FIELD_AGGREGATIONS instead as the single source of truth. '
+                'Add time/distance as boundary fields: '
+                '{"output_key": "time"}, {"output_key": "distance"}')
+            return SortingStrategy(old_criteria)
+
+        # Build strategy from FIELD_AGGREGATIONS
+        if self.field_aggregations:
+            criteria = []
+            for spec in self.field_aggregations:
+                criteria.append({
+                    'key': spec.output_key,
+                    'transform': spec.transform or (lambda x: x),
+                    'reverse': spec.reverse
+                })
+            logger.info(
+                f'Built sorting strategy from FIELD_AGGREGATIONS: '
+                f'{[c["key"] for c in criteria]}')
             return SortingStrategy(criteria)
-        else:
-            logger.info('No LAP_SORTING_CRITERIA in config, using minimal '
-                       'defaults (time, distance). Configure LAP_SORTING_CRITERIA '
-                       'in config to include custom fields like gyro_z_agg.')
-            return default_lap_sorting_strategy()
+
+        # Should never reach here due to validation in __init__
+        logger.error('No field aggregations available for sorting strategy')
+        return default_lap_sorting_strategy()
 
     def generate_laptimes_from_records(self, overwrite=False):
 
@@ -803,11 +850,16 @@ def _calculate_aggregated_fields(self):
 
         Generic implementation that handles any field with custom
         extractor and transform functions.
+
+        Boundary fields (time, distance) are skipped here - they're
+        computed in _finalize_segment_instance().
         """
-        logger.info(f'Calculating {len(self.field_aggregations)} field '
-                    f'aggregations in tub {self.tub.base_path}')
+        record_fields = [spec for spec in self.field_aggregations
+                        if not spec.is_boundary_field()]
+        logger.info(f'Calculating {len(record_fields)} field aggregations '
+                    f'from records in tub {self.tub.base_path}')
 
-        for field_spec in self.field_aggregations:
+        for field_spec in record_fields:
             self._aggregate_single_field(field_spec)
 
     def _aggregate_single_field(self, spec: FieldAggregationSpec):
 
@@ -144,9 +144,20 @@ def train(cfg: Config, tub_paths: str, model: str = None,
     elif add_lap_pct or getattr(cfg, 'LAP_QUANTIFIER', None) is not None:
         pct_mode = PctMode.LAP
 
+    # Extract ranking keys from FIELD_AGGREGATIONS (single source of truth)
+    ranking_keys = None
+    if hasattr(cfg, 'FIELD_AGGREGATIONS') and cfg.FIELD_AGGREGATIONS:
+        ranking_keys = [spec['output_key'] for spec in cfg.FIELD_AGGREGATIONS]
+        logger.info(f'Extracted ranking keys from FIELD_AGGREGATIONS: '
+                    f'{ranking_keys}')
+    else:
+        logger.warning('No FIELD_AGGREGATIONS in config - lap_pct will not be '
+                      'populated. Define FIELD_AGGREGATIONS in your config.')
+
     dataset = TubDataset(config=cfg, tub_paths=all_tub_paths,
                          seq_size=kl.seq_size(),
                          add_lap_pct=add_lap_pct,
+                         ranking_keys=ranking_keys,
                          pct_mode=pct_mode)
     train_records, val_records \
         = train_test_split(dataset.get_records(), shuffle=True,
 
@@ -188,10 +188,10 @@ def extend(self, session_lap_rank, ranking_keys=None,
                                 For LAP mode: {session_id: {lap: rankings}}
                                 For SEGMENT mode: {session_id: {lap: {segment:
                                 rankings}}}
-        :param ranking_keys: Optional list of keys to extract for lap_pct.
-                           If None, uses default ('time', 'distance',
-                           'gyro_z_agg')
-                           for backward compatibility.
+        :param ranking_keys: List of keys to extract for lap_pct. Should match
+                           output_key values from FIELD_AGGREGATIONS in config.
+                           If None, auto-extracts keys from first available
+                           ranking dict (backward compatibility).
         :param pct_mode: Performance mode (NONE, LAP, or SEGMENT)
         :param segment_id: Pre-computed segment ID (for SEGMENT mode).
                           If provided, uses instead of reading record.
@@ -202,9 +202,12 @@ def extend(self, session_lap_rank, ranking_keys=None,
         session_id = self.underlying['_session_id']
         lap_i = self.underlying.get('car/lap', 0)
 
-        # Use default keys for backward compatibility
-        if ranking_keys is None:
-            ranking_keys = ('time', 'distance', 'gyro_z_agg')
+        # Auto-extract ranking_keys if not provided (backward compatibility)
+        if ranking_keys is None and session_lap_rank:
+            ranking_keys = self._extract_ranking_keys(session_lap_rank,
+                                                      pct_mode, segment_id)
+            if ranking_keys is None:
+                return False  # Couldn't determine keys
 
         if pct_mode == PctMode.SEGMENT:
             # Use passed segment_id if provided, otherwise read from record
@@ -237,6 +240,34 @@ def extend(self, session_lap_rank, ranking_keys=None,
 
             return False  # Couldn't populate lap_pct, exclude from training
 
+    def _extract_ranking_keys(self, session_lap_rank, pct_mode, segment_id):
+        """
+        Extract ranking keys from session_lap_rank dict (backward compat).
+
+        :return: List of ranking keys or None if can't be determined
+        """
+        session_id = self.underlying['_session_id']
+        lap_i = self.underlying.get('car/lap', 0)
+
+        try:
+            if pct_mode == PctMode.SEGMENT:
+                # Try to get segment rankings
+                if segment_id is None:
+                    segment_id = self.underlying.get('car/segment')
+                if segment_id is None:
+                    return None
+                lap_dict = session_lap_rank.get(session_id, {}).get(lap_i)
+                if lap_dict and segment_id in lap_dict:
+                    return list(lap_dict[segment_id].keys())
+            else:
+                # LAP mode or fallback
+                lap_i_dict = session_lap_rank.get(session_id, {}).get(lap_i)
+                if lap_i_dict and isinstance(lap_i_dict, dict):
+                    return list(lap_i_dict.keys())
+        except (KeyError, TypeError, AttributeError):
+            pass
+        return None
+
     def __repr__(self) -> str:
         return repr(self.underlying)