Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions examples/manipulation-demo-v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright (C) 2024 Robotec.AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language goveself.rning permissions and
# limitations under the License.


import logging
from typing import List

import rclpy
import rclpy.qos
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.tools import BaseTool
from rai import get_llm_model
from rai.agents.langchain.core import create_conversational_agent
from rai.communication.ros2 import wait_for_ros2_services, wait_for_ros2_topics
from rai.communication.ros2.connectors import ROS2Connector
from rai.tools.ros2.detection.pcl import (
GrippingPointEstimatorConfig,
PointCloudFilterConfig,
PointCloudFromSegmentationConfig,
)
from rai.tools.ros2.detection.tools import GetGrippingPointTool
from rai.tools.ros2.manipulation import (
MoveObjectFromToTool,
ResetArmTool,
)
from rai.tools.ros2.simple import GetROS2ImageConfiguredTool

from rai_whoami.models import EmbodimentInfo

logger = logging.getLogger(__name__)


def create_agent():
rclpy.init()
connector = ROS2Connector(executor_type="single_threaded")

required_services = ["/grounded_sam_segment", "/grounding_dino_classify"]
required_topics = ["/color_image5", "/depth_image5", "/color_camera_info5"]
wait_for_ros2_services(connector, required_services)
wait_for_ros2_topics(connector, required_topics)

node = connector.node

# Declare and set parameters for GetGrippingPointTool
# These also can be set in the launch file or during runtime
parameters_to_set = [
("conversion_ratio", 1.0),
("detection_tools.gripping_point.target_frame", "panda_link0"),
("detection_tools.gripping_point.source_frame", "RGBDCamera5"),
("detection_tools.gripping_point.camera_topic", "/color_image5"),
("detection_tools.gripping_point.depth_topic", "/depth_image5"),
("detection_tools.gripping_point.camera_info_topic", "/color_camera_info5"),
]

# Declare and set each parameter (timeout_sec handled by tool internally)
for param_name, param_value in parameters_to_set:
node.declare_parameter(param_name, param_value)

# Configure gripping point detection algorithms
segmentation_config = PointCloudFromSegmentationConfig(
box_threshold=0.35,
text_threshold=0.45,
)

estimator_config = GrippingPointEstimatorConfig(
strategy="biggest_plane", # Options: "centroid", "top_plane", "biggest_plane"
top_percentile=0.05,
plane_bin_size_m=0.01,
ransac_iterations=200,
distance_threshold_m=0.01,
min_points=10,
)

filter_config = PointCloudFilterConfig(
strategy="dbscan",
min_points=20,
dbscan_eps=0.02,
dbscan_min_samples=10,
)

tools: List[BaseTool] = [
GetGrippingPointTool(
connector=connector,
segmentation_config=segmentation_config,
estimator_config=estimator_config,
filter_config=filter_config,
),
MoveObjectFromToTool(connector=connector, manipulator_frame="panda_link0"),
ResetArmTool(connector=connector, manipulator_frame="panda_link0"),
GetROS2ImageConfiguredTool(connector=connector, topic="/color_image5"),
]

llm = get_llm_model(model_type="complex_model", streaming=True)
embodiment_info = EmbodimentInfo.from_file(
"examples/embodiments/manipulation_embodiment.json"
)
agent = create_conversational_agent(
llm=llm,
tools=tools,
system_prompt=embodiment_info.to_langchain(),
)
return agent


def main():
agent = create_agent()
messages: List[BaseMessage] = []

while True:
prompt = input("Enter a prompt: ")
messages.append(HumanMessage(content=prompt))
output = agent.invoke({"messages": messages})
output["messages"][-1].pretty_print()


if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ build-backend = "poetry.core.masonry.api"
markers = [
"billable: marks test as billable (deselect with '-m \"not billable\"')",
"ci_only: marks test as cli only (deselect with '-m \"not ci_only\"')",
"manual: marks tests as manual (may require demo app to be running)",
]
addopts = "-m 'not billable and not ci_only' --ignore=src"
addopts = "-m 'not billable and not ci_only and not manual' --ignore=src"
log_cli = true
log_cli_level = "INFO"
2 changes: 2 additions & 0 deletions src/rai_core/rai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
get_llm_model_direct,
get_tracing_callbacks,
)
from .tools import timeout

__all__ = [
"AgentRunner",
Expand All @@ -29,4 +30,5 @@
"get_llm_model_config_and_vendor",
"get_llm_model_direct",
"get_tracing_callbacks",
"timeout",
]
3 changes: 3 additions & 0 deletions src/rai_core/rai/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .timeout import timeout as timeout
from .timeout import timeout_method as timeout_method
21 changes: 21 additions & 0 deletions src/rai_core/rai/tools/ros2/detection/__init__.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great work, I really like the new tools.
I am wondering if we should keep these tools and pipelines in rai-core or move them to rai_extensions/rai_openset_detection, since they come with extra dependencies.

The plan is to publish rai_openset_detection on PyPI, so it would be valuable to keep this code there. By "keep" I mean we should remove the old implementation and replace it with the new one.

Copy link
Collaborator Author

@Juliaj Juliaj Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are good points! I agree that rai_extensions would be a better location for these tools since they depend on GDINO and GSAM services. I also just had a good look of segmentation_tools.py and noticed there's some overlap between that file in rai_open_set_vision and the new detection pipeline.

A few questions to help clarify the approach:

  • Has rai_open_set_vision been released to PyPI? I couldn't find it there yet.
  • When you mention "remove the old implementation and replace it with the new one," are you suggesting we consolidate the code in segmentation_tools.py with the new detection pipeline, or would you prefer to release it as a separate rai_openset_detection package as you mentioned?

Do you have a preference for whether this consolidation should be done in this PR or handled separately?

Copy link
Member

@maciejmajek maciejmajek Sep 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it hasn't. The main blocker is a git+ python dependency (pypi does not allow packages with git dependencies)

When you mention "remove the old implementation a...

iirc there are four tools in the rai_open_set_vision. One of them is GetGrabbingPointTool, which uses the old api. This one should be replaced with your implementation. (we can of course leave it and add your implementation + mark the previous one as deprecated)

So, the contents of detection directory should be moved to the open set package.
When we have that the next steps would be (for the rai_open_set_vision package):

  • Remove ROS 2 configuration files
  • Remove git depenendecies
  • Rename the package to something a little bit more graceful
  • Publish to pypi

Do you have a preference for whether this consolidation should be done in this PR or handled separately?

Preferably in this PR, we are using semver for rai-core versioning and removing features will introduce breaking changes (major bump).

Copy link
Collaborator Author

@Juliaj Juliaj Sep 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@maciejmajek, the work of merging 3D detection pipeline code to rai_open_set_vision package has been completed. Instead of modifying segementation_tools.py directly, I added the 3D detection pipeline as a new tool so that we can gradually migrate to it. Please review when you have a chance. Please also let me know whether we should update manipulation demo to -v2.

To reduce the burden of the PR review and keep the code changes more manageable, I propose we address the rai_open_set_vision package renaming and documentation updates in future PRs if you're okay with that approach.

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (C) 2025 Robotec.AI
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .tools import (
GetGrippingPointTool,
)

__all__ = [
"GetGrippingPointTool",
]
Loading
Loading