Skip to content

Commit ffe0de3

Browse files
Anthropic accepts messages with images (#858)
* Anthropic accepts messages with images * Code comments * Fix code scanning alert no. 4: Use of `Kernel.open` or `IO.read` or similar sinks with a non-constant value Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * Update anthropic_message.rb * Code comments and CHANGELOG * Revert to URI.open() * Fixes --------- Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
1 parent afffe92 commit ffe0de3

File tree

10 files changed

+262
-60
lines changed

10 files changed

+262
-60
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
- [SECURITY]: A change which fixes a security vulnerability.
1111

1212
## [Unreleased]
13+
- [FEATURE] [https://github.com/patterns-ai-core/langchainrb/pull/858] Assistant, when using Anthropic, now also accepts image_url in the message.
1314

1415
## [0.19.0] - 2024-10-23
1516
- [BREAKING] [https://github.com/patterns-ai-core/langchainrb/pull/840] Rename `chat_completion_model_name` parameter to `chat_model` in Langchain::LLM parameters.

lib/langchain/assistant/llm/adapters/anthropic.rb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@ def build_chat_params(
3838
# @param tool_call_id [String] The tool call ID
3939
# @return [Messages::AnthropicMessage] The Anthropic message
4040
def build_message(role:, content: nil, image_url: nil, tool_calls: [], tool_call_id: nil)
41-
Langchain.logger.warn "WARNING: Image URL is not supported by Anthropic currently" if image_url
42-
43-
Messages::AnthropicMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
41+
Messages::AnthropicMessage.new(role: role, content: content, image_url: image_url, tool_calls: tool_calls, tool_call_id: tool_call_id)
4442
end
4543

4644
# Extract the tool call information from the Anthropic tool call hash

lib/langchain/assistant/messages/anthropic_message.rb

Lines changed: 82 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,20 @@ class AnthropicMessage < Base
1818
# @param content [String] The content of the message
1919
# @param tool_calls [Array<Hash>] The tool calls made in the message
2020
# @param tool_call_id [String] The ID of the tool call
21-
def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
21+
def initialize(
22+
role:,
23+
content: nil,
24+
image_url: nil,
25+
tool_calls: [],
26+
tool_call_id: nil
27+
)
2228
raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
2329
raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
2430

2531
@role = role
2632
# Some Tools return content as a JSON hence `.to_s`
2733
@content = content.to_s
34+
@image_url = image_url
2835
@tool_calls = tool_calls
2936
@tool_call_id = tool_call_id
3037
end
@@ -33,25 +40,83 @@ def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
3340
#
3441
# @return [Hash] The message as an Anthropic API-compatible hash
3542
def to_hash
36-
{}.tap do |h|
37-
h[:role] = tool? ? "user" : role
38-
39-
h[:content] = if tool?
40-
[
41-
{
42-
type: "tool_result",
43-
tool_use_id: tool_call_id,
44-
content: content
45-
}
46-
]
47-
elsif tool_calls.any?
48-
tool_calls
49-
else
50-
content
51-
end
43+
if assistant?
44+
assistant_hash
45+
elsif tool?
46+
tool_hash
47+
elsif user?
48+
user_hash
5249
end
5350
end
5451

52+
# Convert the message to an Anthropic API-compatible hash
53+
#
54+
# @return [Hash] The message as an Anthropic API-compatible hash, with the role as "assistant"
55+
def assistant_hash
56+
{
57+
role: "assistant",
58+
content: if tool_calls.any?
59+
tool_calls
60+
else
61+
content
62+
end
63+
}
64+
end
65+
66+
# Convert the message to an Anthropic API-compatible hash
67+
#
68+
# @return [Hash] The message as an Anthropic API-compatible hash, with the role as "user"
69+
def tool_hash
70+
{
71+
role: "user",
72+
# TODO: Tool can also return images
73+
# https://docs.anthropic.com/en/docs/build-with-claude/tool-use#handling-tool-use-and-tool-result-content-blocks
74+
content: [
75+
{
76+
type: "tool_result",
77+
tool_use_id: tool_call_id,
78+
content: content
79+
}
80+
]
81+
}
82+
end
83+
84+
# Convert the message to an Anthropic API-compatible hash
85+
#
86+
# @return [Hash] The message as an Anthropic API-compatible hash, with the role as "user"
87+
def user_hash
88+
{
89+
role: "user",
90+
content: build_content_array
91+
}
92+
end
93+
94+
# Builds the content value for the message hash
95+
# @return [Array<Hash>] An array of content hashes
96+
def build_content_array
97+
content_details = []
98+
99+
if content && !content.empty?
100+
content_details << {
101+
type: "text",
102+
text: content
103+
}
104+
end
105+
106+
if image
107+
content_details << {
108+
type: "image",
109+
source: {
110+
type: "base64",
111+
data: image.base64,
112+
media_type: image.mime_type
113+
}
114+
}
115+
end
116+
117+
content_details
118+
end
119+
55120
# Check if the message is a tool call
56121
#
57122
# @return [Boolean] true/false whether this message is a tool call

lib/langchain/assistant/messages/base.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def standard_role
5050
# TODO: Should we return :unknown or raise an error?
5151
:unknown
5252
end
53+
54+
def image
55+
image_url ? Utils::ImageWrapper.new(image_url) : nil
56+
end
5357
end
5458
end
5559
end
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# frozen_string_literal: true
2+
3+
require "open-uri"
4+
5+
module Langchain
6+
module Utils
7+
class ImageWrapper
8+
attr_reader :image_url
9+
10+
def initialize(image_url)
11+
@image_url = image_url
12+
end
13+
14+
def base64
15+
@base64 ||= begin
16+
image_data = open_image.read
17+
Base64.strict_encode64(image_data)
18+
end
19+
end
20+
21+
def mime_type
22+
# TODO: Make it work with local files
23+
open_image.meta["content-type"]
24+
end
25+
26+
private
27+
28+
def open_image
29+
# TODO: Make it work with local files
30+
uri = URI.parse(image_url)
31+
raise URI::InvalidURIError, "Invalid URL scheme" unless %w[http https].include?(uri.scheme)
32+
@open_image ||= URI.open(image_url) # rubocop:disable Security/Open
33+
end
34+
end
35+
end
36+
end
562 KB
Loading

spec/langchain/assistant/assistant_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,7 @@
11461146
before do
11471147
allow(subject.llm).to receive(:chat)
11481148
.with(
1149-
messages: [{role: "user", content: "Please calculate 2+2"}],
1149+
messages: [{role: "user", content: [{text: "Please calculate 2+2", type: "text"}]}],
11501150
tools: calculator.class.function_schemas.to_anthropic_format,
11511151
tool_choice: {disable_parallel_tool_use: false, type: "auto"},
11521152
system: instructions
@@ -1191,7 +1191,7 @@
11911191
allow(subject.llm).to receive(:chat)
11921192
.with(
11931193
messages: [
1194-
{role: "user", content: "Please calculate 2+2"},
1194+
{role: "user", content: [{text: "Please calculate 2+2", type: "text"}]},
11951195
{role: "assistant", content: [
11961196
{
11971197
"type" => "tool_use",

spec/langchain/assistant/messages/anthropic_message_spec.rb

Lines changed: 92 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,107 @@
66
end
77

88
describe "#to_hash" do
9-
it "returns function" do
10-
message = described_class.new(role: "tool_result", content: "4.0", tool_call_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H")
11-
expect(message.to_hash).to eq(
12-
{
13-
role: "user",
9+
context "when role is assistant" do
10+
let(:role) { "assistant" }
11+
12+
it "returns assistant_hash" do
13+
message = described_class.new(role: role, content: "Hello, how can I help you?")
14+
expect(message).to receive(:assistant_hash).and_call_original
15+
expect(message.to_hash).to eq(
16+
role: role,
17+
content: "Hello, how can I help you?"
18+
)
19+
end
20+
21+
it "returns assistant_hash with tool_calls" do
22+
message = described_class.new(
23+
role: role,
24+
tool_calls: [
25+
{
26+
"type" => "tool_use",
27+
"id" => "toolu_01UEciZACvRZ6S4rqAwD1syH",
28+
"name" => "news_retriever__get_everything",
29+
"input" => {
30+
"q" => "Google I/O 2024",
31+
"sort_by" => "publishedAt",
32+
"language" => "en"
33+
}
34+
}
35+
]
36+
)
37+
expect(message.to_hash).to eq(
38+
role: role,
1439
content: [
1540
{
16-
type: "tool_result",
17-
tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H",
18-
content: "4.0"
41+
"type" => "tool_use",
42+
"id" => "toolu_01UEciZACvRZ6S4rqAwD1syH",
43+
"name" => "news_retriever__get_everything",
44+
"input" => {
45+
"q" => "Google I/O 2024",
46+
"sort_by" => "publishedAt",
47+
"language" => "en"
48+
}
1949
}
2050
]
21-
}
22-
)
51+
)
52+
end
2353
end
2454

25-
it "returns tool_calls" do
26-
message = described_class.new(
27-
role: "assistant",
28-
tool_calls: [
55+
context "when role is tool_result" do
56+
let(:message) { described_class.new(role: "tool_result", content: "4.0", tool_call_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H") }
57+
58+
it "returns tool_hash" do
59+
expect(message).to receive(:tool_hash).and_call_original
60+
expect(message.to_hash).to eq(
2961
{
30-
"type" => "tool_use",
31-
"id" => "toolu_01UEciZACvRZ6S4rqAwD1syH",
32-
"name" => "news_retriever__get_everything",
33-
"input" => {
34-
"q" => "Google I/O 2024",
35-
"sort_by" => "publishedAt",
36-
"language" => "en"
37-
}
62+
role: "user",
63+
content: [
64+
{
65+
type: "tool_result",
66+
tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H",
67+
content: "4.0"
68+
}
69+
]
3870
}
39-
]
40-
)
41-
expect(message.to_hash).to eq(
42-
role: "assistant",
43-
content: [
44-
{
45-
"type" => "tool_use",
46-
"id" => "toolu_01UEciZACvRZ6S4rqAwD1syH",
47-
"name" => "news_retriever__get_everything",
48-
"input" => {
49-
"q" => "Google I/O 2024",
50-
"sort_by" => "publishedAt",
51-
"language" => "en"
71+
)
72+
end
73+
end
74+
75+
context "when role is user" do
76+
let(:role) { "user" }
77+
78+
it "returns user_hash" do
79+
message = described_class.new(role: role, content: "Hello, how can I help you?")
80+
expect(message).to receive(:user_hash).and_call_original
81+
expect(message.to_hash).to eq(
82+
role: role,
83+
content: [
84+
{
85+
type: "text",
86+
text: "Hello, how can I help you?"
5287
}
53-
}
54-
]
55-
)
88+
]
89+
)
90+
end
91+
92+
it "returns user_hash with image_url" do
93+
message = described_class.new(role: role, image_url: "https://example.com/image.jpg")
94+
allow(message).to receive(:image).and_return(double(base64: "base64_data", mime_type: "image/jpeg"))
95+
96+
expect(message.to_hash).to eq(
97+
role: role,
98+
content: [
99+
{
100+
type: "image",
101+
source: {
102+
type: "base64",
103+
data: "base64_data",
104+
media_type: "image/jpeg"
105+
}
106+
}
107+
]
108+
)
109+
end
56110
end
57111
end
58112
end
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# frozen_string_literal: true
2+
3+
RSpec.describe Langchain::Assistant::Messages::Base do
4+
describe "tool?" do
5+
it "raises an error" do
6+
expect { described_class.new.tool? }.to raise_error(NotImplementedError)
7+
end
8+
end
9+
10+
describe "system?" do
11+
it "raises an error" do
12+
expect { described_class.new.system? }.to raise_error(NotImplementedError)
13+
end
14+
end
15+
16+
describe "llm?" do
17+
it "raises an error" do
18+
expect { described_class.new.llm? }.to raise_error(NotImplementedError)
19+
end
20+
end
21+
end
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# frozen_string_literal: true
2+
3+
RSpec.describe Langchain::Utils::ImageWrapper do
4+
let(:image_url) { "https://example.com/sf-cable-car.jpeg" }
5+
6+
before do
7+
allow(URI).to receive(:open).with(image_url).and_return(File.open("./spec/fixtures/loaders/sf-cable-car.jpeg"))
8+
end
9+
10+
subject { described_class.new(image_url) }
11+
12+
describe "#base64" do
13+
it "returns the image as a base64 string" do
14+
expect(subject.base64).to eq(Base64.strict_encode64(File.read("./spec/fixtures/loaders/sf-cable-car.jpeg")))
15+
end
16+
end
17+
18+
xdescribe "#mime_type" do
19+
it "returns the mime type of the image" do
20+
expect(subject.mime_type).to eq("image/jpeg")
21+
end
22+
end
23+
end

0 commit comments

Comments
 (0)