Fixes

makseq · makseq · commit 3620bfe998ec · 2025-06-07T02:57:10.000+01:00
diff --git a/label_studio_ml/examples/timeseries_segmenter/model.py b/label_studio_ml/examples/timeseries_segmenter/model.py
@@ -269,15 +269,36 @@ def _collect_samples(
                     end = r["value"]["end"]
                     label = r["value"]["timeserieslabels"][0]
                     
+                    # Convert start/end to same type as time column for comparison
+                    time_dtype = df[params["time_col"]].dtype
+                    logger.debug(f"Task {task_id}: Converting time range [{start}, {end}] to match column dtype {time_dtype}")
+                    try:
+                        if 'int' in str(time_dtype):
+                            start = int(float(start))
+                            end = int(float(end))
+                        elif 'float' in str(time_dtype):
+                            start = float(start)
+                            end = float(end)
+                        # For string/datetime, keep as is
+                        logger.debug(f"Task {task_id}: Converted to [{start}, {end}]")
+                    except (ValueError, TypeError) as e:
+                        logger.warning(f"Could not convert start={start}, end={end} to {time_dtype}: {e}, using original values")
+                    
                     # Find rows in this time range
-                    mask = (df[params["time_col"]] >= start) & (
-                        df[params["time_col"]] <= end
-                    )
+                    try:
+                        mask = (df[params["time_col"]] >= start) & (
+                            df[params["time_col"]] <= end
+                        )
+                    except TypeError as e:
+                        logger.error(f"Task {task_id}: Type error comparing times - start={start} ({type(start)}), end={end} ({type(end)}), time_col dtype={time_dtype}: {e}")
+                        # Skip this annotation if we can't compare
+                        continue
                     
                     # Set the appropriate label index
                     label_idx = label2idx[label]
                     row_labels[mask] = label_idx
                     labeled_rows += mask.sum()
+                    logger.debug(f"Task {task_id}: Labeled {mask.sum()} rows with '{label}' (index {label_idx})")
             
             # Add ALL rows to training data
             X_list.append(df[params["channels"]].values.astype(np.float32))
diff --git a/label_studio_ml/examples/timeseries_segmenter/tests/test_segmenter.py b/label_studio_ml/examples/timeseries_segmenter/tests/test_segmenter.py
@@ -74,9 +74,7 @@ def segmenter_instance(temp_model_dir):
     logger.info("Creating TimeSeriesSegmenter instance for testing")
     with patch.dict(os.environ, {'MODEL_DIR': temp_model_dir, 'TRAIN_EPOCHS': '10', 'SEQUENCE_SIZE': '10'}):
         segmenter = TimeSeriesSegmenter(
-            label_config=LABEL_CONFIG,
-            parsed_label_config={},
-            train_output={}
+            label_config=LABEL_CONFIG
         )
         segmenter.setup()
         logger.info("TimeSeriesSegmenter instance created and set up")
@@ -472,9 +470,7 @@ def test_model_parameters_configuration(self, temp_model_dir):
             logger.info(f"Testing configuration {i+1}/{len(configs)}: {config}")
             with patch.dict(os.environ, {**config, 'MODEL_DIR': temp_model_dir}):
                 segmenter = TimeSeriesSegmenter(
-                    label_config=LABEL_CONFIG,
-                    parsed_label_config={},
-                    train_output={}
+                    label_config=LABEL_CONFIG
                 )
                 segmenter.setup()