Skip to content

Commit 9cb2d9c

Browse files
feat(rig-1192): support pdf, image (openrouter) (#1404)
* feat(rig-1192): support pdf, image (openrouter) * chore: fix merge conflict
1 parent 0977167 commit 9cb2d9c

File tree

2 files changed

+977
-10
lines changed

2 files changed

+977
-10
lines changed
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
//! Example demonstrating OpenRouter's multimodal support for images and PDFs.
2+
//!
3+
//! This example shows how to send images and PDF documents to models via OpenRouter's API.
4+
//!
5+
//! OpenRouter supports:
6+
//! - Images via URL or base64 data URI
7+
//! - PDF files via URL or base64 data URI
8+
//!
9+
//! To run this example, set your OpenRouter API key:
10+
//! ```bash
11+
//! export OPENROUTER_API_KEY=your_api_key
12+
//! cargo run --example openrouter_multimodal
13+
//! ```
14+
15+
use rig::OneOrMany;
16+
use rig::completion::Prompt;
17+
use rig::message::{
18+
Document, DocumentMediaType, DocumentSourceKind, Image, ImageMediaType, Message, UserContent,
19+
};
20+
use rig::prelude::*;
21+
use rig::providers::openrouter;
22+
23+
/// Model that supports vision (images)
24+
const VISION_MODEL: &str = "google/gemini-2.5-flash";
25+
26+
#[tokio::main]
27+
async fn main() -> Result<(), anyhow::Error> {
28+
// Initialize tracing for debugging
29+
tracing_subscriber::fmt()
30+
.with_max_level(tracing::Level::INFO)
31+
.with_target(false)
32+
.init();
33+
34+
// Create OpenRouter client
35+
let client = openrouter::Client::from_env();
36+
37+
// Example 1: Analyze an image from URL
38+
println!("=== Example 1: Image Analysis (URL) ===\n");
39+
analyze_image_url(&client).await?;
40+
41+
// Example 2: Send a PDF document from URL
42+
println!("\n=== Example 2: PDF Analysis (URL) ===\n");
43+
analyze_pdf_url(&client).await?;
44+
45+
// Example 3: Mixed content - text with image
46+
println!("\n=== Example 3: Mixed Content ===\n");
47+
mixed_content(&client).await?;
48+
49+
Ok(())
50+
}
51+
52+
/// Example: Analyze an image from a URL
53+
async fn analyze_image_url(client: &openrouter::Client) -> Result<(), anyhow::Error> {
54+
let agent = client
55+
.agent(VISION_MODEL)
56+
.preamble("You are a helpful assistant that describes images in detail.")
57+
.build();
58+
59+
// Create an image from URL using Rig's standard message types
60+
let image_message = Message::User {
61+
content: OneOrMany::many(vec![
62+
UserContent::text("What do you see in this image? Describe it in detail."),
63+
UserContent::Image(Image {
64+
data: DocumentSourceKind::Url(
65+
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/800px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg".to_string()
66+
),
67+
media_type: Some(ImageMediaType::JPEG),
68+
detail: None,
69+
additional_params: None,
70+
}),
71+
])?,
72+
};
73+
74+
let response = agent.prompt(image_message).await?;
75+
println!("Response:\n{response}");
76+
77+
Ok(())
78+
}
79+
80+
/// Example: Analyze a PDF document from a URL
81+
async fn analyze_pdf_url(client: &openrouter::Client) -> Result<(), anyhow::Error> {
82+
let agent = client
83+
.agent(VISION_MODEL)
84+
.preamble("You are a helpful assistant that summarizes documents.")
85+
.build();
86+
87+
// Create a document from URL using Rig's standard message types
88+
// OpenRouter will automatically convert this to its file format
89+
let pdf_message = Message::User {
90+
content: OneOrMany::many(vec![
91+
UserContent::text("Please summarize the key points of this document."),
92+
UserContent::Document(Document {
93+
data: DocumentSourceKind::Url("https://bitcoin.org/bitcoin.pdf".to_string()),
94+
media_type: Some(DocumentMediaType::PDF),
95+
additional_params: None,
96+
}),
97+
])?,
98+
};
99+
100+
let response = agent.prompt(pdf_message).await?;
101+
println!("Response:\n{response}");
102+
103+
Ok(())
104+
}
105+
106+
/// Example: Mixed content with text and image
107+
async fn mixed_content(client: &openrouter::Client) -> Result<(), anyhow::Error> {
108+
let agent = client
109+
.agent(VISION_MODEL)
110+
.preamble("You are a helpful assistant.")
111+
.build();
112+
113+
// Multiple content items in a single message
114+
let message = Message::User {
115+
content: OneOrMany::many(vec![
116+
UserContent::text("I have two questions:"),
117+
UserContent::text("1. What colors do you see in this image?"),
118+
UserContent::Image(Image {
119+
data: DocumentSourceKind::Url(
120+
"https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png".to_string()
121+
),
122+
media_type: Some(ImageMediaType::PNG),
123+
detail: None,
124+
additional_params: None,
125+
}),
126+
UserContent::text("2. What is the main subject?"),
127+
])?,
128+
};
129+
130+
let response = agent.prompt(message).await?;
131+
println!("Response:\n{response}");
132+
133+
Ok(())
134+
}

0 commit comments

Comments
 (0)