patterns-ai-core · usiegj00 · Jun 17, 2025
diff --git a/examples/generate_image.rb b/examples/generate_image.rb
@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+
+# Example demonstrating Langchain's unified image generation API across providers
+#
+# Prerequisites (set any of these):
+#   export OPENAI_API_KEY="your_api_key"
+#   export GOOGLE_GEMINI_API_KEY="your_api_key"
+#   export GOOGLE_VERTEX_AI_PROJECT_ID="your_project_id"
+#
+# Run with:
+#   bundle exec ruby examples/generate_image.rb
+
+require "bundler/inline"
+
+# Ensure dependencies for a standalone execution outside of gem install
+# This will be skipped if already present in the main Gemfile
+gemfile(true) do
+  source "https://rubygems.org"
+  gem "ruby-openai", ">= 6.3"
+  gem "googleauth" # For Google Vertex AI
+  gem "langchainrb", path: File.expand_path("..", __dir__)
+end
+
+require "langchainrb"
+require "base64"
+
+# Build array of available LLM providers based on environment variables
+llms = []
+
+# OpenAI
+if ENV["OPENAI_API_KEY"]
+  llms << {
+    name: "OpenAI DALL-E 3",
+    instance: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]),
+    options: {size: "1024x1024"}
+  }
+end
+
+# Google Gemini
+if ENV["GOOGLE_GEMINI_API_KEY"]
+  llms << {
+    name: "Google Gemini",
+    instance: Langchain::LLM::GoogleGemini.new(api_key: ENV["GOOGLE_GEMINI_API_KEY"]),
+    options: {n: 1}
+  }
+end
+
+# Google Vertex AI
+if ENV["GOOGLE_VERTEX_AI_PROJECT_ID"]
+  region = ENV.fetch("GOOGLE_VERTEX_AI_REGION", "us-central1")
+  llms << {
+    name: "Google Vertex AI (Imagen)",
+    instance: Langchain::LLM::GoogleVertexAI.new(
+      project_id: ENV["GOOGLE_VERTEX_AI_PROJECT_ID"],
+      region: region
+    ),
+    options: {n: 1}
+  }
+end
+
+if llms.empty?
+  puts "No LLM providers configured. Please set at least one of:"
+  puts "  - OPENAI_API_KEY"
+  puts "  - GOOGLE_GEMINI_API_KEY"
+  puts "  - GOOGLE_VERTEX_AI_PROJECT_ID"
+  exit 1
+end
+
+# Common prompt for all providers
+PROMPT = "A minimalist illustration of a ruby gemstone on a dark background"
+
+puts "Generating images with prompt: \"#{PROMPT}\""
+puts "Using #{llms.length} provider(s)"
+puts
+
+# Demonstrate unified API - same method call works across all providers
+llms.each do |llm_config|
+  puts "=== #{llm_config[:name]} ==="
+
+  begin
+    # Unified API call - works the same for all providers
+    response = llm_config[:instance].generate_image(
+      prompt: PROMPT,
+      **llm_config[:options]
+    )
+
+    # Handle different response formats
+    if response.respond_to?(:image_urls) && !response.image_urls.empty?
+      puts "✓ Generated #{response.image_urls.count} image(s)"
+      response.image_urls.each_with_index do |url, i|
+        puts "  Image #{i + 1} URL: #{url}"
+      end
+    elsif response.respond_to?(:image_base64s) && !response.image_base64s.empty?
+      puts "✓ Generated #{response.image_base64s.count} image(s)"
+      response.image_base64s.each_with_index do |data, i|
+        filename = "#{llm_config[:name].downcase.gsub(/\s+/, '_')}_image_#{i + 1}.png"
+        begin
+          decoded_data = Base64.decode64(data)
+          File.binwrite(filename, decoded_data)
+          puts "  Image #{i + 1}: Saved to #{filename} (#{decoded_data.bytesize} bytes)"
+        rescue => e
+          puts "  Image #{i + 1}: Base64 data received (#{data.length} chars) - error saving: #{e.message}"
+        end
+      end
+    else
+      puts "✗ No images in response"
+    end
+  rescue => e
+    puts "✗ Error: #{e.message}"
+  end
+
+  puts
+end
+
+puts "Summary:"
+puts "- All providers use the same `generate_image` method"
+puts "- Responses provide either `image_urls` or `image_base64s`"
+puts "- This unified API makes it easy to switch between providers" 
diff --git a/lib/langchain/llm/base.rb b/lib/langchain/llm/base.rb
@@ -77,6 +77,15 @@ def summarize(...)
       raise NotImplementedError, "#{self.class.name} does not support summarization"
     end
 
+    #
+    # Generate an image for a given prompt. Parameters will depend on the LLM provider.
+    #
+    # @raise NotImplementedError if not supported by the LLM
+    #
+    def generate_image(...)
+      raise NotImplementedError, "#{self.class.name} does not support image generation"
+    end
+
     #
     # Returns an instance of Langchain::LLM::Parameters::Chat
     #

diff --git a/lib/langchain/llm/google_gemini.rb b/lib/langchain/llm/google_gemini.rb
@@ -7,6 +7,7 @@ class GoogleGemini < Base
     DEFAULTS = {
       chat_model: "gemini-1.5-pro-latest",
       embedding_model: "text-embedding-004",
+      image_generation_model: "gemini-2.0-flash-preview-image-generation",
       temperature: 0.0
     }
 
@@ -91,6 +92,30 @@ def embed(
       Langchain::LLM::Response::GoogleGeminiResponse.new(parsed_response, model: model)
     end
 
+    # Generate an image for a given prompt using Gemini Image Generation capability
+    #
+    # @param prompt [String] The textual prompt for the desired image
+    # @param n [Integer] Number of images to generate (candidateCount) (default 1)
+    # @return [Langchain::LLM::Response::GoogleGeminiResponse] Response wrapper
+    def generate_image(prompt:, n: 1)
+      raise ArgumentError.new("prompt argument is required") if prompt.to_s.strip.empty?
+
+      parameters = {
+        contents: [{parts: [{text: prompt}]}],
+        generationConfig: {
+          responseModalities: ["TEXT", "IMAGE"],
+          candidateCount: n
+        }
+      }
+
+      model = @defaults[:image_generation_model]
+      uri = URI("https://generativelanguage.googleapis.com/v1beta/models/#{model}:generateContent?key=#{api_key}")
+
+      parsed_response = http_post(uri, parameters)
+
+      Langchain::LLM::Response::GoogleGeminiResponse.new(parsed_response, model: model)
+    end
+
     private
 
     def http_post(url, params)

diff --git a/lib/langchain/llm/google_vertexai.rb b/lib/langchain/llm/google_vertexai.rb
@@ -18,7 +18,8 @@ class GoogleVertexAI < Base
       top_k: 40,
       dimensions: 768,
       embedding_model: "textembedding-gecko",
-      chat_model: "gemini-1.0-pro"
+      chat_model: "gemini-1.0-pro",
+      image_generation_model: "imagen-3.0-generate-002"
     }.freeze
 
     # Google Cloud has a project id and a specific region of deployment.
@@ -99,6 +100,33 @@ def chat(params = {})
       end
     end
 
+    # Generate images with Imagen model via Vertex AI
+    #
+    # @param prompt [String] The text prompt for the image
+    # @param n [Integer] Number of images to generate (1-4)
+    # @return [Langchain::LLM::Response::GoogleVertexAIResponse]
+    def generate_image(prompt:, n: 1)
+      raise ArgumentError.new("prompt argument is required") if prompt.to_s.strip.empty?
+
+      params = {
+        instances: [
+          {
+            prompt: prompt
+          }
+        ],
+        parameters: {
+          sampleCount: n
+        }
+      }
+
+      model = @defaults[:image_generation_model]
+      uri = URI("#{url}#{model}:predict")
+
+      parsed_response = http_post(uri, params)
+
+      Langchain::LLM::Response::GoogleVertexAIResponse.new(parsed_response, model: model)
+    end
+
     private
 
     def http_post(url, params)

diff --git a/lib/langchain/llm/openai.rb b/lib/langchain/llm/openai.rb
@@ -18,7 +18,8 @@ class OpenAI < Base
     DEFAULTS = {
       n: 1,
       chat_model: "gpt-4o-mini",
-      embedding_model: "text-embedding-3-small"
+      embedding_model: "text-embedding-3-small",
+      image_generation_model: "dall-e-3"
     }.freeze
 
     EMBEDDING_SIZES = {
@@ -161,6 +162,29 @@ def summarize(text:)
       complete(prompt: prompt)
     end
 
+    # Generate images for a given prompt using OpenAI Images API
+    #
+    # @param prompt [String] Textual prompt describing the desired image
+    # @param n [Integer] Number of images to generate (default 1)
+    # @param size [String] Requested resolution, eg. "1024x1024" (default "1024x1024")
+    # @return [Langchain::LLM::Response::OpenAIResponse] Wrapper around the raw response
+    def generate_image(prompt:, n: 1, size: "1024x1024")
+      raise ArgumentError, "prompt argument is required" if prompt.to_s.strip.empty?
+
+      parameters = {
+        prompt: prompt,
+        n: n,
+        size: size,
+        model: @defaults[:image_generation_model]
+      }
+
+      response = with_api_error_handling do
+        client.images.generate(parameters: parameters)
+      end
+
+      Langchain::LLM::Response::OpenAIResponse.new(response)
+    end
+
     def default_dimensions
       @defaults[:dimensions] || EMBEDDING_SIZES.fetch(defaults[:embedding_model])
     end

diff --git a/lib/langchain/llm/response/google_gemini_response.rb b/lib/langchain/llm/response/google_gemini_response.rb
@@ -45,5 +45,16 @@ def completion_tokens
     def total_tokens
       raw_response.dig("usageMetadata", "totalTokenCount")
     end
+
+    # Returns array of base64 image data from inline_data parts
+    def image_base64s
+      candidates = raw_response["candidates"] || []
+      candidates.flat_map do |candidate|
+        parts = candidate.dig("content", "parts") || []
+        parts.filter_map { |part| part.dig("inlineData", "data") }
+      end
+    end
+
+    alias_method :image_blobs, :image_base64s
   end
 end
diff --git a/lib/langchain/llm/response/google_vertex_ai_response.rb b/lib/langchain/llm/response/google_vertex_ai_response.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Langchain::LLM::Response
+  class GoogleVertexAIResponse < BaseResponse
+    # Imagen responses place image bytes in predictions list
+    # Each prediction may include {"bytes": "BASE64"} or nested keys.
+    def image_base64s
+      Array(raw_response["predictions"]).map do |pred|
+        pred["bytes"] || pred.dig("image", "image_bytes") || pred.dig("image", "imageBytes")
+      end.compact
+    end
+
+    alias_method :image_blobs, :image_base64s
+
+    # Other methods not supported for image response
+    def chat_completion; nil; end
+    def embedding; nil; end
+    def embeddings; []; end
+    def prompt_tokens; nil; end
+    def completion_tokens; nil; end
+    def total_tokens; nil; end
+  end
+end 
diff --git a/lib/langchain/llm/response/openai_response.rb b/lib/langchain/llm/response/openai_response.rb
@@ -59,5 +59,12 @@ def completion_tokens
     def total_tokens
       raw_response.dig("usage", "total_tokens")
     end
+
+    # Returns an array of image URLs when the response comes from the Image Generation endpoint
+    #
+    # @return [Array<String>] list of image URLs or [] if not present
+    def image_urls
+      Array(raw_response.dig("data")).map { |d| d["url"] }.compact
+    end
   end
 end
diff --git a/spec/lib/langchain/llm/base_spec.rb b/spec/lib/langchain/llm/base_spec.rb
@@ -48,6 +48,12 @@ def initialize
     end
   end
 
+  describe "#generate_image" do
+    it "raises an error" do
+      expect { subject.generate_image }.to raise_error(NotImplementedError)
+    end
+  end
+
   describe "#chat_parameters(params = {})" do
     subject { TestLLM.new }
 

diff --git a/spec/lib/langchain/llm/google_gemini_image_spec.rb b/spec/lib/langchain/llm/google_gemini_image_spec.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+RSpec.describe Langchain::LLM::GoogleGemini do
+  let(:subject) { described_class.new(api_key: "XYZ") }
+
+  describe "#generate_image" do
+    let(:prompt) { "Generate a minimalistic landscape" }
+    let(:model_id) { "gemini-2.0-flash-preview-image-generation" }
+    let(:uri) { URI("https://generativelanguage.googleapis.com/v1beta/models/#{model_id}:generateContent?key=XYZ") }
+    let(:params) do
+      {
+        contents: [{parts: [{text: prompt}]}],
+        generationConfig: {responseModalities: ["IMAGE"], candidateCount: 1}
+      }
+    end
+    let(:api_response) do
+      {"candidates" => [{"content" => {"parts" => [{"inline_data" => {"data" => "BASE64STRING"}}]}}]}
+    end
+
+    before do
+      http_response = double("response", body: api_response.to_json)
+      http = double("http")
+      allow(http).to receive(:use_ssl=)
+      allow(http).to receive(:set_debug_output)
+      allow(http).to receive(:request).and_return(http_response)
+      allow(Net::HTTP).to receive(:new).and_return(http)
+    end
+
+    it "returns a response wrapper" do
+      resp = subject.generate_image(prompt: prompt)
+      expect(resp).to be_a(Langchain::LLM::Response::GoogleGeminiResponse)
+    end
+  end
+end 
diff --git a/spec/lib/langchain/llm/google_vertexai_image_spec.rb b/spec/lib/langchain/llm/google_vertexai_image_spec.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+require "googleauth"
+require_relative "#{Langchain.root}/langchain/llm/response/google_vertex_ai_response"
+
+RSpec.describe Langchain::LLM::GoogleVertexAI do
+  let(:subject) { described_class.new(project_id: "proj", region: "us-central1") }
+
+  before do
+    allow(Google::Auth).to receive(:get_application_default).and_return(
+      double("Google::Auth::UserRefreshCredentials", fetch_access_token!: {access_token: 123})
+    )
+  end
+
+  describe "#generate_image" do
+    let(:prompt) { "A cartoon cat" }
+    let(:model) { "imagen-3.0-generate-002" }
+    let(:uri) { URI("#{subject.url}#{model}:predict") }
+    let(:params) { {instances: [{prompt: prompt}], parameters: {sampleCount: 1}} }
+    let(:api_response) { {"predictions" => [{"bytes" => "BASE64IMG"}]} }
+
+    before do
+      allow_any_instance_of(Net::HTTP).to receive(:request).and_return(double(body: api_response.to_json))
+    end
+
+    it "returns wrapper with base64s" do
+      resp = subject.generate_image(prompt: prompt)
+      expect(resp).to be_a(Langchain::LLM::Response::GoogleVertexAIResponse)
+      expect(resp.image_base64s).to eq(["BASE64IMG"])
+    end
+  end
+end