Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions examples/generate_image.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# frozen_string_literal: true

# Example demonstrating Langchain's unified image generation API across providers
#
# Prerequisites (set any of these):
# export OPENAI_API_KEY="your_api_key"
# export GOOGLE_GEMINI_API_KEY="your_api_key"
# export GOOGLE_VERTEX_AI_PROJECT_ID="your_project_id"
#
# Run with:
# bundle exec ruby examples/generate_image.rb

require "bundler/inline"

# Ensure dependencies for a standalone execution outside of gem install
# This will be skipped if already present in the main Gemfile
gemfile(true) do
source "https://rubygems.org"
gem "ruby-openai", ">= 6.3"
gem "googleauth" # For Google Vertex AI
gem "langchainrb", path: File.expand_path("..", __dir__)
end

require "langchainrb"
require "base64"

# Build array of available LLM providers based on environment variables
llms = []

# OpenAI
if ENV["OPENAI_API_KEY"]
llms << {
name: "OpenAI DALL-E 3",
instance: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]),
options: {size: "1024x1024"}
}
end

# Google Gemini
if ENV["GOOGLE_GEMINI_API_KEY"]
llms << {
name: "Google Gemini",
instance: Langchain::LLM::GoogleGemini.new(api_key: ENV["GOOGLE_GEMINI_API_KEY"]),
options: {n: 1}
}
end

# Google Vertex AI
if ENV["GOOGLE_VERTEX_AI_PROJECT_ID"]
region = ENV.fetch("GOOGLE_VERTEX_AI_REGION", "us-central1")
llms << {
name: "Google Vertex AI (Imagen)",
instance: Langchain::LLM::GoogleVertexAI.new(
project_id: ENV["GOOGLE_VERTEX_AI_PROJECT_ID"],
region: region
),
options: {n: 1}
}
end

if llms.empty?
puts "No LLM providers configured. Please set at least one of:"
puts " - OPENAI_API_KEY"
puts " - GOOGLE_GEMINI_API_KEY"
puts " - GOOGLE_VERTEX_AI_PROJECT_ID"
exit 1
end

# Common prompt for all providers
PROMPT = "A minimalist illustration of a ruby gemstone on a dark background"

puts "Generating images with prompt: \"#{PROMPT}\""
puts "Using #{llms.length} provider(s)"
puts

# Demonstrate unified API - same method call works across all providers
llms.each do |llm_config|
puts "=== #{llm_config[:name]} ==="

begin
# Unified API call - works the same for all providers
response = llm_config[:instance].generate_image(
prompt: PROMPT,
**llm_config[:options]
)

# Handle different response formats
if response.respond_to?(:image_urls) && !response.image_urls.empty?
puts "✓ Generated #{response.image_urls.count} image(s)"
response.image_urls.each_with_index do |url, i|
puts " Image #{i + 1} URL: #{url}"
end
elsif response.respond_to?(:image_base64s) && !response.image_base64s.empty?
puts "✓ Generated #{response.image_base64s.count} image(s)"
response.image_base64s.each_with_index do |data, i|
filename = "#{llm_config[:name].downcase.gsub(/\s+/, '_')}_image_#{i + 1}.png"
begin
decoded_data = Base64.decode64(data)
File.binwrite(filename, decoded_data)
puts " Image #{i + 1}: Saved to #{filename} (#{decoded_data.bytesize} bytes)"
rescue => e
puts " Image #{i + 1}: Base64 data received (#{data.length} chars) - error saving: #{e.message}"
end
end
else
puts "✗ No images in response"
end
rescue => e
puts "✗ Error: #{e.message}"
end

puts
end

puts "Summary:"
puts "- All providers use the same `generate_image` method"
puts "- Responses provide either `image_urls` or `image_base64s`"
puts "- This unified API makes it easy to switch between providers"
9 changes: 9 additions & 0 deletions lib/langchain/llm/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ def summarize(...)
raise NotImplementedError, "#{self.class.name} does not support summarization"
end

#
# Generate an image for a given prompt. Parameters will depend on the LLM provider.
#
# @raise NotImplementedError if not supported by the LLM
#
def generate_image(...)
raise NotImplementedError, "#{self.class.name} does not support image generation"
end

#
# Returns an instance of Langchain::LLM::Parameters::Chat
#
Expand Down
25 changes: 25 additions & 0 deletions lib/langchain/llm/google_gemini.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class GoogleGemini < Base
DEFAULTS = {
chat_model: "gemini-1.5-pro-latest",
embedding_model: "text-embedding-004",
image_generation_model: "gemini-2.0-flash-preview-image-generation",
temperature: 0.0
}

Expand Down Expand Up @@ -91,6 +92,30 @@ def embed(
Langchain::LLM::Response::GoogleGeminiResponse.new(parsed_response, model: model)
end

# Generate an image for a given prompt using Gemini Image Generation capability
#
# @param prompt [String] The textual prompt for the desired image
# @param n [Integer] Number of images to generate (candidateCount) (default 1)
# @return [Langchain::LLM::Response::GoogleGeminiResponse] Response wrapper
def generate_image(prompt:, n: 1)
raise ArgumentError.new("prompt argument is required") if prompt.to_s.strip.empty?

parameters = {
contents: [{parts: [{text: prompt}]}],
generationConfig: {
responseModalities: ["TEXT", "IMAGE"],
candidateCount: n
}
}

model = @defaults[:image_generation_model]
uri = URI("https://generativelanguage.googleapis.com/v1beta/models/#{model}:generateContent?key=#{api_key}")

parsed_response = http_post(uri, parameters)

Langchain::LLM::Response::GoogleGeminiResponse.new(parsed_response, model: model)
end

private

def http_post(url, params)
Expand Down
30 changes: 29 additions & 1 deletion lib/langchain/llm/google_vertexai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class GoogleVertexAI < Base
top_k: 40,
dimensions: 768,
embedding_model: "textembedding-gecko",
chat_model: "gemini-1.0-pro"
chat_model: "gemini-1.0-pro",
image_generation_model: "imagen-3.0-generate-002"
}.freeze

# Google Cloud has a project id and a specific region of deployment.
Expand Down Expand Up @@ -99,6 +100,33 @@ def chat(params = {})
end
end

# Generate images with Imagen model via Vertex AI
#
# @param prompt [String] The text prompt for the image
# @param n [Integer] Number of images to generate (1-4)
# @return [Langchain::LLM::Response::GoogleVertexAIResponse]
def generate_image(prompt:, n: 1)
raise ArgumentError.new("prompt argument is required") if prompt.to_s.strip.empty?

params = {
instances: [
{
prompt: prompt
}
],
parameters: {
sampleCount: n
}
}

model = @defaults[:image_generation_model]
uri = URI("#{url}#{model}:predict")

parsed_response = http_post(uri, params)

Langchain::LLM::Response::GoogleVertexAIResponse.new(parsed_response, model: model)
end

private

def http_post(url, params)
Expand Down
26 changes: 25 additions & 1 deletion lib/langchain/llm/openai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class OpenAI < Base
DEFAULTS = {
n: 1,
chat_model: "gpt-4o-mini",
embedding_model: "text-embedding-3-small"
embedding_model: "text-embedding-3-small",
image_generation_model: "dall-e-3"
}.freeze

EMBEDDING_SIZES = {
Expand Down Expand Up @@ -161,6 +162,29 @@ def summarize(text:)
complete(prompt: prompt)
end

# Generate images for a given prompt using OpenAI Images API
#
# @param prompt [String] Textual prompt describing the desired image
# @param n [Integer] Number of images to generate (default 1)
# @param size [String] Requested resolution, eg. "1024x1024" (default "1024x1024")
# @return [Langchain::LLM::Response::OpenAIResponse] Wrapper around the raw response
def generate_image(prompt:, n: 1, size: "1024x1024")
raise ArgumentError, "prompt argument is required" if prompt.to_s.strip.empty?

parameters = {
prompt: prompt,
n: n,
size: size,
model: @defaults[:image_generation_model]
}

response = with_api_error_handling do
client.images.generate(parameters: parameters)
end

Langchain::LLM::Response::OpenAIResponse.new(response)
end

def default_dimensions
@defaults[:dimensions] || EMBEDDING_SIZES.fetch(defaults[:embedding_model])
end
Expand Down
11 changes: 11 additions & 0 deletions lib/langchain/llm/response/google_gemini_response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,16 @@ def completion_tokens
def total_tokens
raw_response.dig("usageMetadata", "totalTokenCount")
end

# Returns array of base64 image data from inline_data parts
def image_base64s
candidates = raw_response["candidates"] || []
candidates.flat_map do |candidate|
parts = candidate.dig("content", "parts") || []
parts.filter_map { |part| part.dig("inlineData", "data") }
end
end

alias_method :image_blobs, :image_base64s
end
end
23 changes: 23 additions & 0 deletions lib/langchain/llm/response/google_vertex_ai_response.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# frozen_string_literal: true

module Langchain::LLM::Response
class GoogleVertexAIResponse < BaseResponse
# Imagen responses place image bytes in predictions list
# Each prediction may include {"bytes": "BASE64"} or nested keys.
def image_base64s
Array(raw_response["predictions"]).map do |pred|
pred["bytes"] || pred.dig("image", "image_bytes") || pred.dig("image", "imageBytes")
end.compact
end

alias_method :image_blobs, :image_base64s

# Other methods not supported for image response
def chat_completion; nil; end
def embedding; nil; end
def embeddings; []; end
def prompt_tokens; nil; end
def completion_tokens; nil; end
def total_tokens; nil; end
end
end
7 changes: 7 additions & 0 deletions lib/langchain/llm/response/openai_response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,12 @@ def completion_tokens
def total_tokens
raw_response.dig("usage", "total_tokens")
end

# Returns an array of image URLs when the response comes from the Image Generation endpoint
#
# @return [Array<String>] list of image URLs or [] if not present
def image_urls
Array(raw_response.dig("data")).map { |d| d["url"] }.compact
end
end
end
6 changes: 6 additions & 0 deletions spec/lib/langchain/llm/base_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ def initialize
end
end

describe "#generate_image" do
it "raises an error" do
expect { subject.generate_image }.to raise_error(NotImplementedError)
end
end

describe "#chat_parameters(params = {})" do
subject { TestLLM.new }

Expand Down
34 changes: 34 additions & 0 deletions spec/lib/langchain/llm/google_gemini_image_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# frozen_string_literal: true

RSpec.describe Langchain::LLM::GoogleGemini do
let(:subject) { described_class.new(api_key: "XYZ") }

describe "#generate_image" do
let(:prompt) { "Generate a minimalistic landscape" }
let(:model_id) { "gemini-2.0-flash-preview-image-generation" }
let(:uri) { URI("https://generativelanguage.googleapis.com/v1beta/models/#{model_id}:generateContent?key=XYZ") }
let(:params) do
{
contents: [{parts: [{text: prompt}]}],
generationConfig: {responseModalities: ["IMAGE"], candidateCount: 1}
}
end
let(:api_response) do
{"candidates" => [{"content" => {"parts" => [{"inline_data" => {"data" => "BASE64STRING"}}]}}]}
end

before do
http_response = double("response", body: api_response.to_json)
http = double("http")
allow(http).to receive(:use_ssl=)
allow(http).to receive(:set_debug_output)
allow(http).to receive(:request).and_return(http_response)
allow(Net::HTTP).to receive(:new).and_return(http)
end

it "returns a response wrapper" do
resp = subject.generate_image(prompt: prompt)
expect(resp).to be_a(Langchain::LLM::Response::GoogleGeminiResponse)
end
end
end
32 changes: 32 additions & 0 deletions spec/lib/langchain/llm/google_vertexai_image_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# frozen_string_literal: true

require "googleauth"
require_relative "#{Langchain.root}/langchain/llm/response/google_vertex_ai_response"

RSpec.describe Langchain::LLM::GoogleVertexAI do
let(:subject) { described_class.new(project_id: "proj", region: "us-central1") }

before do
allow(Google::Auth).to receive(:get_application_default).and_return(
double("Google::Auth::UserRefreshCredentials", fetch_access_token!: {access_token: 123})
)
end

describe "#generate_image" do
let(:prompt) { "A cartoon cat" }
let(:model) { "imagen-3.0-generate-002" }
let(:uri) { URI("#{subject.url}#{model}:predict") }
let(:params) { {instances: [{prompt: prompt}], parameters: {sampleCount: 1}} }
let(:api_response) { {"predictions" => [{"bytes" => "BASE64IMG"}]} }

before do
allow_any_instance_of(Net::HTTP).to receive(:request).and_return(double(body: api_response.to_json))
end

it "returns wrapper with base64s" do
resp = subject.generate_image(prompt: prompt)
expect(resp).to be_a(Langchain::LLM::Response::GoogleVertexAIResponse)
expect(resp.image_base64s).to eq(["BASE64IMG"])
end
end
end
Loading