graphistry
diff --git a/‎graphistry/compute/gfql/df_executor.py‎
Lines changed: 81 additions & 52 deletions b/‎graphistry/compute/gfql/df_executor.py‎
Lines changed: 81 additions & 52 deletions
diff --git a/‎graphistry/compute/gfql/same_path/bfs.py‎
Lines changed: 25 additions & 20 deletions b/‎graphistry/compute/gfql/same_path/bfs.py‎
Lines changed: 25 additions & 20 deletions
diff --git a/‎graphistry/compute/gfql/same_path/df_utils.py‎
Lines changed: 94 additions & 9 deletions b/‎graphistry/compute/gfql/same_path/df_utils.py‎
Lines changed: 94 additions & 9 deletions
diff --git a/‎graphistry/compute/gfql/same_path/edge_semantics.py‎
Lines changed: 8 additions & 5 deletions b/‎graphistry/compute/gfql/same_path/edge_semantics.py‎
Lines changed: 8 additions & 5 deletions
@@ -3,13 +3,19 @@
 Contains pure functions for building edge pairs and computing BFS reachability.
 """
 
-from typing import Any, Set
-
-import pandas as pd
+from typing import Any, Sequence
 
 from graphistry.compute.typing import DataFrameT
 from .edge_semantics import EdgeSemantics
-from .df_utils import concat_frames, df_cons
+from .df_utils import (
+    concat_frames,
+    series_values,
+    domain_from_values,
+    domain_diff,
+    domain_union,
+    domain_is_empty,
+    domain_to_frame,
+)
 
 
 def build_edge_pairs(
@@ -23,23 +29,22 @@ def build_edge_pairs(
     For undirected edges, both directions are included.
     For directed edges, direction follows sem.join_cols().
     """
-    is_cudf = edges_df.__class__.__module__.startswith("cudf")
     if sem.is_undirected:
         fwd = edges_df[[src_col, dst_col]].copy()
-        fwd.columns = pd.Index(['__from__', '__to__'])
+        fwd.columns = ['__from__', '__to__']
         rev = edges_df[[dst_col, src_col]].copy()
-        rev.columns = pd.Index(['__from__', '__to__'])
+        rev.columns = ['__from__', '__to__']
         result = concat_frames([fwd, rev])
         return result.drop_duplicates() if result is not None else fwd.iloc[:0]
     else:
         join_col, result_col = sem.join_cols(src_col, dst_col)
         pairs = edges_df[[join_col, result_col]].copy()
-        pairs.columns = pd.Index(['__from__', '__to__'])
+        pairs.columns = ['__from__', '__to__']
         return pairs
 
 
 def bfs_reachability(
-    edge_pairs: DataFrameT, start_nodes: Set[Any], max_hops: int, hop_col: str
+    edge_pairs: DataFrameT, start_nodes: Sequence[Any], max_hops: int, hop_col: str
 ) -> DataFrameT:
     """Compute BFS reachability with hop distance tracking.
 
@@ -48,19 +53,18 @@ def bfs_reachability(
 
     Args:
         edge_pairs: DataFrame with ['__from__', '__to__'] columns
-        start_nodes: Set of starting node IDs (hop 0)
+        start_nodes: Starting node domain (hop 0)
         max_hops: Maximum number of hops to traverse
         hop_col: Name for the hop distance column in output
 
     Returns:
         DataFrame with all reachable nodes and their hop distances
     """
-    from .df_utils import series_values
-    import pandas as pd
-
     # Use same DataFrame type as input
-    result = df_cons(edge_pairs, {'__node__': list(start_nodes), hop_col: 0})
-    visited_idx = pd.Index(start_nodes) if not isinstance(start_nodes, pd.Index) else start_nodes
+    start_domain = domain_from_values(start_nodes, edge_pairs)
+    result = domain_to_frame(edge_pairs, start_domain, '__node__')
+    result[hop_col] = 0
+    visited_idx = start_domain
 
     for hop in range(1, max_hops + 1):
         frontier = result[result[hop_col] == hop - 1][['__node__']].rename(columns={'__node__': '__from__'})
@@ -69,14 +73,15 @@ def bfs_reachability(
         next_df = edge_pairs.merge(frontier, on='__from__', how='inner')[['__to__']].drop_duplicates()
         next_df = next_df.rename(columns={'__to__': '__node__'})
 
-        # Filter out already visited nodes using pd.Index operations
+        # Filter out already visited nodes using domain operations
         candidate_nodes = series_values(next_df['__node__'])
-        new_node_ids = candidate_nodes.difference(visited_idx)
-        if len(new_node_ids) == 0:
+        new_node_ids = domain_diff(candidate_nodes, visited_idx)
+        if domain_is_empty(new_node_ids):
             break
 
-        new_nodes = df_cons(edge_pairs, {'__node__': list(new_node_ids), hop_col: hop})
-        visited_idx = visited_idx.union(new_node_ids)
+        new_nodes = domain_to_frame(edge_pairs, new_node_ids, '__node__')
+        new_nodes[hop_col] = hop
+        visited_idx = domain_union(visited_idx, new_node_ids)
 
         result = concat_frames([result, new_nodes])
         if result is None:
 
@@ -3,13 +3,25 @@
 Contains pure functions for series/dataframe operations used across the executor.
 """
 
-from typing import Any, Optional, Sequence, Set
+from typing import Any, Optional, Sequence
 
 import pandas as pd
 
 from graphistry.compute.typing import DataFrameT
 
 
+def _is_cudf_obj(obj: Any) -> bool:
+    return hasattr(obj, "__class__") and obj.__class__.__module__.startswith("cudf")
+
+
+def _cudf_index_op(left: Any, right: Any, op: str) -> Any:
+    method = getattr(left, op)
+    try:
+        return method(right, sort=False)
+    except TypeError:
+        return method(right)
+
+
 def df_cons(template_df: DataFrameT, data: dict) -> DataFrameT:
     """Construct a DataFrame of the same type as template_df.
 
@@ -59,26 +71,99 @@ def series_unique(series: Any) -> Any:
 
     For set operations (intersection, union), use series_values() instead.
     """
+    if _is_cudf_obj(series):
+        return series.dropna().unique()
+    if isinstance(series, pd.Index):
+        return series.dropna().unique()
     if hasattr(series, 'dropna'):
         return series.dropna().unique()
     pandas_series = to_pandas_series(series)
     return pandas_series.dropna().unique()
 
 
-def series_values(series: Any) -> pd.Index:
-    """Extract unique non-null values from a series as a pd.Index.
-
-    Returns pd.Index which supports:
-    - .intersection() for & operations
-    - .union() for | operations
-    - Direct use in .isin() (no conversion needed)
+def series_values(series: Any) -> Any:
+    """Extract unique non-null values from a series as an Index-like domain.
 
-    This is ~9x faster than the previous set-based approach.
+    Returns a pandas.Index for pandas objects, and cudf.Index for cuDF objects.
+    These Index types support .intersection/.union/.difference and are safe to
+    pass into .isin() without host syncs.
     """
+    if _is_cudf_obj(series):
+        import cudf  # type: ignore
+        if isinstance(series, cudf.Index):
+            return series.dropna().unique()
+        return cudf.Index(series.dropna().unique())
+    if isinstance(series, pd.Index):
+        return series.dropna().unique()
     pandas_series = to_pandas_series(series)
     return pd.Index(pandas_series.dropna().unique())
 
 
+def domain_empty(template: Optional[Any] = None) -> Any:
+    if _is_cudf_obj(template):
+        import cudf  # type: ignore
+        return cudf.Index([])
+    return pd.Index([])
+
+
+def domain_is_empty(domain: Any) -> bool:
+    return domain is None or len(domain) == 0
+
+
+def domain_from_values(values: Any, template: Optional[Any] = None) -> Any:
+    if domain_is_empty(values):
+        return domain_empty(template)
+    if _is_cudf_obj(values):
+        import cudf  # type: ignore
+        if isinstance(values, cudf.Index):
+            return values
+        return cudf.Index(values)
+    if isinstance(values, pd.Index):
+        return values
+    if _is_cudf_obj(template):
+        import cudf  # type: ignore
+        return cudf.Index(values)
+    return pd.Index(values)
+
+
+def domain_intersect(left: Any, right: Any) -> Any:
+    if domain_is_empty(left) or domain_is_empty(right):
+        return domain_empty(left if left is not None else right)
+    if isinstance(left, pd.Index):
+        return left.intersection(right)
+    if _is_cudf_obj(left):
+        return _cudf_index_op(left, right, "intersection")
+    return left.intersection(right)
+
+
+def domain_union(left: Any, right: Any) -> Any:
+    if domain_is_empty(left):
+        return right
+    if domain_is_empty(right):
+        return left
+    if isinstance(left, pd.Index):
+        return left.union(right)
+    if _is_cudf_obj(left):
+        return _cudf_index_op(left, right, "union")
+    return left.union(right)
+
+
+def domain_diff(left: Any, right: Any) -> Any:
+    if domain_is_empty(left) or domain_is_empty(right):
+        return left
+    if isinstance(left, pd.Index):
+        return left.difference(right)
+    if _is_cudf_obj(left):
+        return _cudf_index_op(left, right, "difference")
+    return left.difference(right)
+
+
+def domain_to_frame(template_df: DataFrameT, domain: Any, col: str) -> DataFrameT:
+    if domain is None:
+        return df_cons(template_df, {col: []})
+    return df_cons(template_df, {col: domain})
+
+
 # Standard column name for ID DataFrames used in semi-joins
 _ID_COL = "__id__"
 
 
@@ -4,10 +4,10 @@
 """
 
 from dataclasses import dataclass
-from typing import Tuple, TYPE_CHECKING
+from typing import Any, Tuple, TYPE_CHECKING
 
 from graphistry.compute.ast import ASTEdge
-from .df_utils import series_values
+from .df_utils import series_values, domain_union
 
 if TYPE_CHECKING:
     pass
@@ -96,7 +96,7 @@ def endpoint_cols(self, src_col: str, dst_col: str) -> Tuple[str, str]:
 
     def start_nodes(
         self, edges_df, src_col: str, dst_col: str
-    ) -> set:
+    ) -> Any:
         """Get starting nodes for edge traversal (for backward propagation).
 
         For forward: returns src nodes (where traversal starts)
@@ -109,10 +109,13 @@ def start_nodes(
             dst_col: Destination column name
 
         Returns:
-            pd.Index of node IDs where traversal starts
+            Index-like domain of node IDs where traversal starts
         """
         if self.is_undirected:
-            return series_values(edges_df[src_col]).union(series_values(edges_df[dst_col]))
+            return domain_union(
+                series_values(edges_df[src_col]),
+                series_values(edges_df[dst_col]),
+            )
         elif self.is_reverse:
             return series_values(edges_df[dst_col])
         else: