Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ HELICONE_CONTROL_PLANE_API_KEY="sk-helicone-..."
OPENAI_API_KEY="sk-proj-"
ANTHROPIC_API_KEY="sk-..."
GEMINI_API_KEY="fjalksj"
IONET_API_KEY="io-v2-..."
AWS_ACCESS_KEY="asbbj340j"
AWS_SECRET_KEY="kfjd09431"

Expand Down
21 changes: 21 additions & 0 deletions ai-gateway/config/embedded/providers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,24 @@ bedrock:
- "meta.llama3-2-90b-instruct-v1:0"
- "meta.llama4-maverick-17b-instruct-v1:0"
base-url: https://bedrock-runtime.us-east-1.amazonaws.com/

ionet:
models:
- "deepseek-ai/DeepSeek-R1-0528"
- "Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar"
- "meta-llama/Llama-3.2-90B-Vision-Instruct"
- "meta-llama/Llama-3.3-70B-Instruct"
- "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
- "mistralai/Devstral-Small-2505"
- "mistralai/Magistral-Small-2506"
- "mistralai/Mistral-Large-Instruct-2411"
- "mistralai/Mistral-Nemo-Instruct-2407"
- "moonshotai/Kimi-K2-Instruct-0905"
- "moonshotai/Kimi-K2-Thinking"
- "openai/gpt-oss-120b"
- "openai/gpt-oss-20b"
- "Qwen/Qwen2.5-VL-32B-Instruct"
- "Qwen/Qwen3-235B-A22B-Thinking-2507"
- "Qwen/Qwen3-Next-80B-A3B-Instruct"
- "zai-org/GLM-4.6"
base-url: https://api.intelligence.io.solutions/api/
48 changes: 48 additions & 0 deletions ai-gateway/config/ionet-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# IO.NET Intelligence Provider Test Configuration
# This configuration file is for testing the IO.NET integration with the AI Gateway

# Helicone features (set to 'none' for testing without auth)
helicone:
features: none

# Cache configuration (optional - in-memory for testing)
cache-store:
type: in-memory

# Rate limit store (required for rate limiting)
rate-limit-store:
type: in-memory

# Router configuration
routers:
ionet-test:
# Load balancing configuration for IO.NET models
load-balance:
chat:
strategy: model-latency
models:
# High-capacity reasoning models
- ionet/deepseek-ai/DeepSeek-R1-0528
- ionet/meta-llama/Llama-3.3-70B-Instruct
- ionet/moonshotai/Kimi-K2-Thinking

# Multimodal models
- ionet/meta-llama/Llama-3.2-90B-Vision-Instruct

# Efficient models for general use
- ionet/openai/gpt-oss-20b
- ionet/mistralai/Mistral-Nemo-Instruct-2407

# Optional: Enable caching for cost savings
cache:
directive: "max-age=3600" # Cache for 1 hour

# Global settings (optional)
global:
cache:
directive: "max-age=1800"
# Logging configuration (optional)
# telemetry:
# exporter: otlp
# level: info
# endpoint: http://localhost:4317
14 changes: 14 additions & 0 deletions ai-gateway/src/config/balance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,20 @@ impl BalanceConfig {
)]))
}

#[cfg(any(test, feature = "testing"))]
#[must_use]
pub fn ionet_chat() -> Self {
Self(HashMap::from([(
EndpointType::Chat,
BalanceConfigInner::ProviderWeighted {
providers: nes![WeightedProvider {
provider: InferenceProvider::Named("ionet".into()),
weight: Decimal::from(1),
}],
},
)]))
}

#[must_use]
pub fn providers(&self) -> IndexSet<InferenceProvider> {
self.0
Expand Down
17 changes: 17 additions & 0 deletions ai-gateway/src/middleware/mapper/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,23 @@ impl EndpointConverterRegistryInner {
));
registry.register_converter(key, converter);

let key = RegistryKey::new(
ApiEndpoint::OpenAI(OpenAI::chat_completions()),
ApiEndpoint::OpenAICompatible {
provider: InferenceProvider::Named("ionet".into()),
openai_endpoint: OpenAI::chat_completions(),
},
);
let converter = TypedEndpointConverter::<
endpoints::openai::ChatCompletions,
endpoints::openai::OpenAICompatibleChatCompletions,
OpenAICompatibleConverter,
>::new(OpenAICompatibleConverter::new(
InferenceProvider::Named("ionet".into()),
model_mapper.clone(),
));
registry.register_converter(key, converter);

registry
}

Expand Down
42 changes: 42 additions & 0 deletions ai-gateway/src/tests/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ pub struct MockArgs {
pub global_bedrock_latency: Option<u64>,
#[builder(setter(strip_option), default = None)]
pub global_mistral_latency: Option<u64>,
#[builder(setter(strip_option), default = None)]
pub global_ionet_latency: Option<u64>,

#[builder(setter(strip_option), default = None)]
pub openai_port: Option<u16>,
Expand All @@ -47,6 +49,8 @@ pub struct MockArgs {
pub jawn_port: Option<u16>,
#[builder(setter(strip_option), default = None)]
pub mistral_port: Option<u16>,
#[builder(setter(strip_option), default = None)]
pub ionet_port: Option<u16>,

/// Map of stub id to the expectations on the number of times it should be
/// called.
Expand All @@ -65,6 +69,7 @@ pub struct Mock {
pub minio_mock: Stubr,
pub jawn_mock: Stubr,
pub mistral_mock: Stubr,
pub ionet_mock: Stubr,
args: MockArgs,
}

Expand Down Expand Up @@ -157,6 +162,20 @@ impl Mock {
.unwrap()
.base_url = Url::parse(&mistral_mock.uri()).unwrap();

let ionet_mock = start_mock_for_test(
&get_stubs_path("ionet"),
args.global_ionet_latency,
args.stubs.as_ref(),
args.verify,
args.ionet_port,
)
.await;
config
.providers
.get_mut(&InferenceProvider::Named("ionet".into()))
.unwrap()
.base_url = Url::parse(&ionet_mock.uri()).unwrap();

let minio_mock = start_mock_for_test(
&get_stubs_path("minio"),
None,
Expand Down Expand Up @@ -189,6 +208,7 @@ impl Mock {
minio_mock,
jawn_mock,
mistral_mock,
ionet_mock,
args,
}
}
Expand Down Expand Up @@ -273,6 +293,16 @@ impl Mock {
)
.await;

let ionet_mock = start_mock(
&get_stubs_path("ionet"),
None,
args.stubs.as_ref(),
false,
false,
args.ionet_port,
)
.await;

Self {
openai_mock,
anthropic_mock,
Expand All @@ -282,6 +312,7 @@ impl Mock {
minio_mock,
jawn_mock,
mistral_mock,
ionet_mock,
args,
}
}
Expand All @@ -295,6 +326,7 @@ impl Mock {
self.minio_mock.http_server.verify().await;
self.jawn_mock.http_server.verify().await;
self.mistral_mock.http_server.verify().await;
self.ionet_mock.http_server.verify().await;
}

pub async fn reset(&self) {
Expand All @@ -306,6 +338,7 @@ impl Mock {
self.minio_mock.http_server.reset().await;
self.jawn_mock.http_server.reset().await;
self.mistral_mock.http_server.reset().await;
self.ionet_mock.http_server.reset().await;
}

pub async fn stubs(&self, stubs: HashMap<&'static str, Times>) {
Expand Down Expand Up @@ -372,6 +405,15 @@ impl Mock {
)
.await;

register_stubs_for_mock(
&self.ionet_mock,
&get_stubs_path("ionet"),
self.args.global_ionet_latency,
&stubs,
self.args.verify,
)
.await;

handle_presigned_url_mock(
&self.jawn_mock,
&self.minio_mock,
Expand Down
3 changes: 3 additions & 0 deletions ai-gateway/src/types/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ impl InferenceProvider {
"Hyperbolic" => Ok(InferenceProvider::Named("hyperbolic".into())),
"Deepseek" => Ok(InferenceProvider::Named("deepseek".into())),
"X.AI (Grok)" => Ok(InferenceProvider::Named("xai".into())),
"IO.NET Intelligence" => {
Ok(InferenceProvider::Named("ionet".into()))
}
_ => Err(ProviderError::InvalidProviderName(provider_name.into())),
}
}
Expand Down
42 changes: 42 additions & 0 deletions ai-gateway/stubs/ionet/chat_completion.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"id": "success:ionet:chat_completion",
"request": {
"method": "POST",
"url": "/v1/chat/completions"
},
"response": {
"status": 200,
"headers": {
"Content-Type": "application/json"
},
"jsonBody": {
"id": "chatcmpl-ionet-12345",
"object": "chat.completion",
"created": 1741569952,
"model": "meta-llama/Llama-3.3-70B-Instruct",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm running on IO.NET's decentralized GPU infrastructure. How can I assist you today?",
"refusal": null
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 15,
"completion_tokens": 25,
"total_tokens": 40,
"prompt_tokens_details": {
"cached_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0
}
}
}
}
}
51 changes: 51 additions & 0 deletions ai-gateway/tests/single_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,54 @@ async fn mistral() {
let response = harness.call(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}

/// Sending a request to https://localhost/router should
/// result in the proxied request targeting IO.NET Intelligence API
#[tokio::test]
#[serial_test::serial(default_mock)]
async fn ionet() {
let mut config = Config::test_default();
// Disable auth for this test since we're testing basic provider
// functionality
config.helicone.features = HeliconeFeatures::None;
let router_config = RouterConfigs::new(HashMap::from([(
RouterId::Named(CompactString::new("my-router")),
RouterConfig {
load_balance: BalanceConfig::ionet_chat(),
..Default::default()
},
)]));
config.routers = router_config;
let mock_args = MockArgs::builder()
.stubs(HashMap::from([
("success:ionet:chat_completion", 1.into()),
("success:minio:upload_request", 0.into()),
("success:jawn:log_request", 0.into()),
]))
.build();
let mut harness = Harness::builder()
.with_config(config)
.with_mock_args(mock_args)
.build()
.await;
let request_body = axum_core::body::Body::from(
serde_json::to_vec(&json!({
"model": "ionet/meta-llama/Llama-3.3-70B-Instruct",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
]
}))
.unwrap(),
);
let request = Request::builder()
.method(Method::POST)
// default router
.uri("http://router.helicone.com/router/my-router/chat/completions")
.body(request_body)
.unwrap();
let response = harness.call(request).await.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}