Helicone · yug49 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/.env.template b/.env.template
@@ -4,6 +4,7 @@ HELICONE_CONTROL_PLANE_API_KEY="sk-helicone-..."
 OPENAI_API_KEY="sk-proj-"
 ANTHROPIC_API_KEY="sk-..."
 GEMINI_API_KEY="fjalksj"
+IONET_API_KEY="io-v2-..."
 AWS_ACCESS_KEY="asbbj340j"
 AWS_SECRET_KEY="kfjd09431"
 

diff --git a/ai-gateway/config/embedded/providers.yaml b/ai-gateway/config/embedded/providers.yaml
@@ -153,3 +153,24 @@ bedrock:
     - "meta.llama3-2-90b-instruct-v1:0"
     - "meta.llama4-maverick-17b-instruct-v1:0"
   base-url: https://bedrock-runtime.us-east-1.amazonaws.com/
+
+ionet:
+  models:
+    - "deepseek-ai/DeepSeek-R1-0528"
+    - "Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar"
+    - "meta-llama/Llama-3.2-90B-Vision-Instruct"
+    - "meta-llama/Llama-3.3-70B-Instruct"
+    - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
+    - "mistralai/Devstral-Small-2505"
+    - "mistralai/Magistral-Small-2506"
+    - "mistralai/Mistral-Large-Instruct-2411"
+    - "mistralai/Mistral-Nemo-Instruct-2407"
+    - "moonshotai/Kimi-K2-Instruct-0905"
+    - "moonshotai/Kimi-K2-Thinking"
+    - "openai/gpt-oss-120b"
+    - "openai/gpt-oss-20b"
+    - "Qwen/Qwen2.5-VL-32B-Instruct"
+    - "Qwen/Qwen3-235B-A22B-Thinking-2507"
+    - "Qwen/Qwen3-Next-80B-A3B-Instruct"
+    - "zai-org/GLM-4.6"
+  base-url: https://api.intelligence.io.solutions/api/
diff --git a/ai-gateway/config/ionet-test.yaml b/ai-gateway/config/ionet-test.yaml
@@ -0,0 +1,48 @@
+# IO.NET Intelligence Provider Test Configuration
+# This configuration file is for testing the IO.NET integration with the AI Gateway
+
+# Helicone features (set to 'none' for testing without auth)
+helicone:
+    features: none
+
+# Cache configuration (optional - in-memory for testing)
+cache-store:
+    type: in-memory
+
+# Rate limit store (required for rate limiting)
+rate-limit-store:
+    type: in-memory
+
+# Router configuration
+routers:
+    ionet-test:
+        # Load balancing configuration for IO.NET models
+        load-balance:
+            chat:
+                strategy: model-latency
+                models:
+                    # High-capacity reasoning models
+                    - ionet/deepseek-ai/DeepSeek-R1-0528
+                    - ionet/meta-llama/Llama-3.3-70B-Instruct
+                    - ionet/moonshotai/Kimi-K2-Thinking
+
+                    # Multimodal models
+                    - ionet/meta-llama/Llama-3.2-90B-Vision-Instruct
+
+                    # Efficient models for general use
+                    - ionet/openai/gpt-oss-20b
+                    - ionet/mistralai/Mistral-Nemo-Instruct-2407
+
+        # Optional: Enable caching for cost savings
+        cache:
+            directive: "max-age=3600" # Cache for 1 hour
+
+# Global settings (optional)
+global:
+    cache:
+        directive: "max-age=1800"
+# Logging configuration (optional)
+# telemetry:
+#   exporter: otlp
+#   level: info
+#   endpoint: http://localhost:4317
diff --git a/ai-gateway/src/config/balance.rs b/ai-gateway/src/config/balance.rs
@@ -116,6 +116,20 @@ impl BalanceConfig {
         )]))
     }
 
+    #[cfg(any(test, feature = "testing"))]
+    #[must_use]
+    pub fn ionet_chat() -> Self {
+        Self(HashMap::from([(
+            EndpointType::Chat,
+            BalanceConfigInner::ProviderWeighted {
+                providers: nes![WeightedProvider {
+                    provider: InferenceProvider::Named("ionet".into()),
+                    weight: Decimal::from(1),
+                }],
+            },
+        )]))
+    }
+
     #[must_use]
     pub fn providers(&self) -> IndexSet<InferenceProvider> {
         self.0

diff --git a/ai-gateway/src/middleware/mapper/registry.rs b/ai-gateway/src/middleware/mapper/registry.rs
@@ -231,6 +231,23 @@ impl EndpointConverterRegistryInner {
         ));
         registry.register_converter(key, converter);
 
+        let key = RegistryKey::new(
+            ApiEndpoint::OpenAI(OpenAI::chat_completions()),
+            ApiEndpoint::OpenAICompatible {
+                provider: InferenceProvider::Named("ionet".into()),
+                openai_endpoint: OpenAI::chat_completions(),
+            },
+        );
+        let converter = TypedEndpointConverter::<
+            endpoints::openai::ChatCompletions,
+            endpoints::openai::OpenAICompatibleChatCompletions,
+            OpenAICompatibleConverter,
+        >::new(OpenAICompatibleConverter::new(
+            InferenceProvider::Named("ionet".into()),
+            model_mapper.clone(),
+        ));
+        registry.register_converter(key, converter);
+
         registry
     }
 

diff --git a/ai-gateway/src/tests/mock.rs b/ai-gateway/src/tests/mock.rs
@@ -30,6 +30,8 @@ pub struct MockArgs {
     pub global_bedrock_latency: Option<u64>,
     #[builder(setter(strip_option), default = None)]
     pub global_mistral_latency: Option<u64>,
+    #[builder(setter(strip_option), default = None)]
+    pub global_ionet_latency: Option<u64>,
 
     #[builder(setter(strip_option), default = None)]
     pub openai_port: Option<u16>,
@@ -47,6 +49,8 @@ pub struct MockArgs {
     pub jawn_port: Option<u16>,
     #[builder(setter(strip_option), default = None)]
     pub mistral_port: Option<u16>,
+    #[builder(setter(strip_option), default = None)]
+    pub ionet_port: Option<u16>,
 
     /// Map of stub id to the expectations on the number of times it should be
     /// called.
@@ -65,6 +69,7 @@ pub struct Mock {
     pub minio_mock: Stubr,
     pub jawn_mock: Stubr,
     pub mistral_mock: Stubr,
+    pub ionet_mock: Stubr,
     args: MockArgs,
 }
 
@@ -157,6 +162,20 @@ impl Mock {
             .unwrap()
             .base_url = Url::parse(&mistral_mock.uri()).unwrap();
 
+        let ionet_mock = start_mock_for_test(
+            &get_stubs_path("ionet"),
+            args.global_ionet_latency,
+            args.stubs.as_ref(),
+            args.verify,
+            args.ionet_port,
+        )
+        .await;
+        config
+            .providers
+            .get_mut(&InferenceProvider::Named("ionet".into()))
+            .unwrap()
+            .base_url = Url::parse(&ionet_mock.uri()).unwrap();
+
         let minio_mock = start_mock_for_test(
             &get_stubs_path("minio"),
             None,
@@ -189,6 +208,7 @@ impl Mock {
             minio_mock,
             jawn_mock,
             mistral_mock,
+            ionet_mock,
             args,
         }
     }
@@ -273,6 +293,16 @@ impl Mock {
         )
         .await;
 
+        let ionet_mock = start_mock(
+            &get_stubs_path("ionet"),
+            None,
+            args.stubs.as_ref(),
+            false,
+            false,
+            args.ionet_port,
+        )
+        .await;
+
         Self {
             openai_mock,
             anthropic_mock,
@@ -282,6 +312,7 @@ impl Mock {
             minio_mock,
             jawn_mock,
             mistral_mock,
+            ionet_mock,
             args,
         }
     }
@@ -295,6 +326,7 @@ impl Mock {
         self.minio_mock.http_server.verify().await;
         self.jawn_mock.http_server.verify().await;
         self.mistral_mock.http_server.verify().await;
+        self.ionet_mock.http_server.verify().await;
     }
 
     pub async fn reset(&self) {
@@ -306,6 +338,7 @@ impl Mock {
         self.minio_mock.http_server.reset().await;
         self.jawn_mock.http_server.reset().await;
         self.mistral_mock.http_server.reset().await;
+        self.ionet_mock.http_server.reset().await;
     }
 
     pub async fn stubs(&self, stubs: HashMap<&'static str, Times>) {
@@ -372,6 +405,15 @@ impl Mock {
         )
         .await;
 
+        register_stubs_for_mock(
+            &self.ionet_mock,
+            &get_stubs_path("ionet"),
+            self.args.global_ionet_latency,
+            &stubs,
+            self.args.verify,
+        )
+        .await;
+
         handle_presigned_url_mock(
             &self.jawn_mock,
             &self.minio_mock,

diff --git a/ai-gateway/src/types/provider.rs b/ai-gateway/src/types/provider.rs
@@ -138,6 +138,9 @@ impl InferenceProvider {
             "Hyperbolic" => Ok(InferenceProvider::Named("hyperbolic".into())),
             "Deepseek" => Ok(InferenceProvider::Named("deepseek".into())),
             "X.AI (Grok)" => Ok(InferenceProvider::Named("xai".into())),
+            "IO.NET Intelligence" => {
+                Ok(InferenceProvider::Named("ionet".into()))
+            }
             _ => Err(ProviderError::InvalidProviderName(provider_name.into())),
         }
     }

diff --git a/ai-gateway/stubs/ionet/chat_completion.json b/ai-gateway/stubs/ionet/chat_completion.json
@@ -0,0 +1,42 @@
+{
+    "id": "success:ionet:chat_completion",
+    "request": {
+        "method": "POST",
+        "url": "/v1/chat/completions"
+    },
+    "response": {
+        "status": 200,
+        "headers": {
+            "Content-Type": "application/json"
+        },
+        "jsonBody": {
+            "id": "chatcmpl-ionet-12345",
+            "object": "chat.completion",
+            "created": 1741569952,
+            "model": "meta-llama/Llama-3.3-70B-Instruct",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hello! I'm running on IO.NET's decentralized GPU infrastructure. How can I assist you today?",
+                        "refusal": null
+                    },
+                    "logprobs": null,
+                    "finish_reason": "stop"
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 15,
+                "completion_tokens": 25,
+                "total_tokens": 40,
+                "prompt_tokens_details": {
+                    "cached_tokens": 0
+                },
+                "completion_tokens_details": {
+                    "reasoning_tokens": 0
+                }
+            }
+        }
+    }
+}
diff --git a/ai-gateway/tests/single_provider.rs b/ai-gateway/tests/single_provider.rs
@@ -347,3 +347,54 @@ async fn mistral() {
     let response = harness.call(request).await.unwrap();
     assert_eq!(response.status(), StatusCode::OK);
 }
+
+/// Sending a request to https://localhost/router should
+/// result in the proxied request targeting IO.NET Intelligence API
+#[tokio::test]
+#[serial_test::serial(default_mock)]
+async fn ionet() {
+    let mut config = Config::test_default();
+    // Disable auth for this test since we're testing basic provider
+    // functionality
+    config.helicone.features = HeliconeFeatures::None;
+    let router_config = RouterConfigs::new(HashMap::from([(
+        RouterId::Named(CompactString::new("my-router")),
+        RouterConfig {
+            load_balance: BalanceConfig::ionet_chat(),
+            ..Default::default()
+        },
+    )]));
+    config.routers = router_config;
+    let mock_args = MockArgs::builder()
+        .stubs(HashMap::from([
+            ("success:ionet:chat_completion", 1.into()),
+            ("success:minio:upload_request", 0.into()),
+            ("success:jawn:log_request", 0.into()),
+        ]))
+        .build();
+    let mut harness = Harness::builder()
+        .with_config(config)
+        .with_mock_args(mock_args)
+        .build()
+        .await;
+    let request_body = axum_core::body::Body::from(
+        serde_json::to_vec(&json!({
+            "model": "ionet/meta-llama/Llama-3.3-70B-Instruct",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "Hello, world!"
+                }
+            ]
+        }))
+        .unwrap(),
+    );
+    let request = Request::builder()
+        .method(Method::POST)
+        // default router
+        .uri("http://router.helicone.com/router/my-router/chat/completions")
+        .body(request_body)
+        .unwrap();
+    let response = harness.call(request).await.unwrap();
+    assert_eq!(response.status(), StatusCode::OK);
+}