|
| 1 | +//! Example demonstrating OpenRouter's multimodal support for images and PDFs. |
| 2 | +//! |
| 3 | +//! This example shows how to send images and PDF documents to models via OpenRouter's API. |
| 4 | +//! |
| 5 | +//! OpenRouter supports: |
| 6 | +//! - Images via URL or base64 data URI |
| 7 | +//! - PDF files via URL or base64 data URI |
| 8 | +//! |
| 9 | +//! To run this example, set your OpenRouter API key: |
| 10 | +//! ```bash |
| 11 | +//! export OPENROUTER_API_KEY=your_api_key |
| 12 | +//! cargo run --example openrouter_multimodal |
| 13 | +//! ``` |
| 14 | +
|
| 15 | +use rig::OneOrMany; |
| 16 | +use rig::completion::Prompt; |
| 17 | +use rig::message::{ |
| 18 | + Document, DocumentMediaType, DocumentSourceKind, Image, ImageMediaType, Message, UserContent, |
| 19 | +}; |
| 20 | +use rig::prelude::*; |
| 21 | +use rig::providers::openrouter; |
| 22 | + |
| 23 | +/// Model that supports vision (images) |
| 24 | +const VISION_MODEL: &str = "google/gemini-2.5-flash"; |
| 25 | + |
| 26 | +#[tokio::main] |
| 27 | +async fn main() -> Result<(), anyhow::Error> { |
| 28 | + // Initialize tracing for debugging |
| 29 | + tracing_subscriber::fmt() |
| 30 | + .with_max_level(tracing::Level::INFO) |
| 31 | + .with_target(false) |
| 32 | + .init(); |
| 33 | + |
| 34 | + // Create OpenRouter client |
| 35 | + let client = openrouter::Client::from_env(); |
| 36 | + |
| 37 | + // Example 1: Analyze an image from URL |
| 38 | + println!("=== Example 1: Image Analysis (URL) ===\n"); |
| 39 | + analyze_image_url(&client).await?; |
| 40 | + |
| 41 | + // Example 2: Send a PDF document from URL |
| 42 | + println!("\n=== Example 2: PDF Analysis (URL) ===\n"); |
| 43 | + analyze_pdf_url(&client).await?; |
| 44 | + |
| 45 | + // Example 3: Mixed content - text with image |
| 46 | + println!("\n=== Example 3: Mixed Content ===\n"); |
| 47 | + mixed_content(&client).await?; |
| 48 | + |
| 49 | + Ok(()) |
| 50 | +} |
| 51 | + |
| 52 | +/// Example: Analyze an image from a URL |
| 53 | +async fn analyze_image_url(client: &openrouter::Client) -> Result<(), anyhow::Error> { |
| 54 | + let agent = client |
| 55 | + .agent(VISION_MODEL) |
| 56 | + .preamble("You are a helpful assistant that describes images in detail.") |
| 57 | + .build(); |
| 58 | + |
| 59 | + // Create an image from URL using Rig's standard message types |
| 60 | + let image_message = Message::User { |
| 61 | + content: OneOrMany::many(vec![ |
| 62 | + UserContent::text("What do you see in this image? Describe it in detail."), |
| 63 | + UserContent::Image(Image { |
| 64 | + data: DocumentSourceKind::Url( |
| 65 | + "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/800px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg".to_string() |
| 66 | + ), |
| 67 | + media_type: Some(ImageMediaType::JPEG), |
| 68 | + detail: None, |
| 69 | + additional_params: None, |
| 70 | + }), |
| 71 | + ])?, |
| 72 | + }; |
| 73 | + |
| 74 | + let response = agent.prompt(image_message).await?; |
| 75 | + println!("Response:\n{response}"); |
| 76 | + |
| 77 | + Ok(()) |
| 78 | +} |
| 79 | + |
| 80 | +/// Example: Analyze a PDF document from a URL |
| 81 | +async fn analyze_pdf_url(client: &openrouter::Client) -> Result<(), anyhow::Error> { |
| 82 | + let agent = client |
| 83 | + .agent(VISION_MODEL) |
| 84 | + .preamble("You are a helpful assistant that summarizes documents.") |
| 85 | + .build(); |
| 86 | + |
| 87 | + // Create a document from URL using Rig's standard message types |
| 88 | + // OpenRouter will automatically convert this to its file format |
| 89 | + let pdf_message = Message::User { |
| 90 | + content: OneOrMany::many(vec![ |
| 91 | + UserContent::text("Please summarize the key points of this document."), |
| 92 | + UserContent::Document(Document { |
| 93 | + data: DocumentSourceKind::Url("https://bitcoin.org/bitcoin.pdf".to_string()), |
| 94 | + media_type: Some(DocumentMediaType::PDF), |
| 95 | + additional_params: None, |
| 96 | + }), |
| 97 | + ])?, |
| 98 | + }; |
| 99 | + |
| 100 | + let response = agent.prompt(pdf_message).await?; |
| 101 | + println!("Response:\n{response}"); |
| 102 | + |
| 103 | + Ok(()) |
| 104 | +} |
| 105 | + |
| 106 | +/// Example: Mixed content with text and image |
| 107 | +async fn mixed_content(client: &openrouter::Client) -> Result<(), anyhow::Error> { |
| 108 | + let agent = client |
| 109 | + .agent(VISION_MODEL) |
| 110 | + .preamble("You are a helpful assistant.") |
| 111 | + .build(); |
| 112 | + |
| 113 | + // Multiple content items in a single message |
| 114 | + let message = Message::User { |
| 115 | + content: OneOrMany::many(vec![ |
| 116 | + UserContent::text("I have two questions:"), |
| 117 | + UserContent::text("1. What colors do you see in this image?"), |
| 118 | + UserContent::Image(Image { |
| 119 | + data: DocumentSourceKind::Url( |
| 120 | + "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png".to_string() |
| 121 | + ), |
| 122 | + media_type: Some(ImageMediaType::PNG), |
| 123 | + detail: None, |
| 124 | + additional_params: None, |
| 125 | + }), |
| 126 | + UserContent::text("2. What is the main subject?"), |
| 127 | + ])?, |
| 128 | + }; |
| 129 | + |
| 130 | + let response = agent.prompt(message).await?; |
| 131 | + println!("Response:\n{response}"); |
| 132 | + |
| 133 | + Ok(()) |
| 134 | +} |
0 commit comments