Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/semantic-router/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import (
"os"

"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/vllm-project/semantic-router/semantic-router/pkg/api"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/semantic-router/pkg/observability"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/api"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
)

func main() {
Expand Down
6 changes: 3 additions & 3 deletions src/semantic-router/go.mod
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
module github.com/vllm-project/semantic-router/semantic-router
module github.com/vllm-project/semantic-router/src/semantic-router

go 1.24.1

replace (
github.com/vllm-project/semantic-router/candle-binding => ../../candle-binding
github.com/vllm-project/semantic-router/semantic-router/pkg/config => ./pkg/config
github.com/vllm-project/semantic-router/semantic-router/pkg/extproc => ./pkg/extproc
github.com/vllm-project/semantic-router/src/semantic-router/pkg/config => ./pkg/config
github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc => ./pkg/extproc
)

require (
Expand Down
6 changes: 3 additions & 3 deletions src/semantic-router/pkg/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import (
"sync"
"time"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/semantic-router/pkg/services"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/services"
)

// ClassificationAPIServer holds the server state and dependencies
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/api/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
"net/http/httptest"
"testing"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/services"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/services"
)

func TestHandleBatchClassification(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
. "github.com/onsi/gomega"

candle "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
)

func TestCache(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (

"gopkg.in/yaml.v3"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/config/parse_configfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

var _ = Describe("ParseConfigFile and ReplaceGlobalConfig", func() {
Expand Down
6 changes: 3 additions & 3 deletions src/semantic-router/pkg/extproc/caching_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ import (

ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"

"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
)

var _ = Describe("Caching Functionality", func() {
Expand Down
6 changes: 3 additions & 3 deletions src/semantic-router/pkg/extproc/edge_cases_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import (

ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"

"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
)

var _ = Describe("Edge Cases and Error Conditions", func() {
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/extproc/endpoint_selection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import (

core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
)

var _ = Describe("Endpoint Selection", func() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"strings"
"testing"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

// TestReasoningModeConfiguration demonstrates how the reasoning mode works with the new config-based approach
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/extproc/reason_mode_selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import (
"log"
"strings"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
)

// shouldUseReasoningMode determines if reasoning mode should be enabled based on the query category
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"encoding/json"
"testing"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

// TestModelReasoningFamily tests the new family-based configuration approach
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"encoding/json"
"testing"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

// TestReasoningModeIntegration tests the complete reasoning mode integration
Expand Down
10 changes: 5 additions & 5 deletions src/semantic-router/pkg/extproc/request_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/semantic-router/pkg/observability"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/http"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/pii"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/http"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/pii"
)

// parseOpenAIRequest parses the raw JSON using the OpenAI SDK types
Expand Down
8 changes: 4 additions & 4 deletions src/semantic-router/pkg/extproc/request_processing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ import (
core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"

"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/semantic-router/pkg/tools"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/tools"
)

var _ = Describe("Request Processing", func() {
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/extproc/response_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import (
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"

"github.com/openai/openai-go"
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/semantic-router/pkg/observability"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
)

// handleResponseHeaders processes the response headers
Expand Down
12 changes: 6 additions & 6 deletions src/semantic-router/pkg/extproc/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ import (
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"

candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/services"
"github.com/vllm-project/semantic-router/semantic-router/pkg/tools"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/classification"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/pii"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/services"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/tools"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/classification"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/pii"
)

var (
Expand Down
10 changes: 5 additions & 5 deletions src/semantic-router/pkg/extproc/security_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ import (

ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"

"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/classification"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/classification"

"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/pii"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/pii"
)

const (
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/extproc/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (

ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"github.com/fsnotify/fsnotify"
"github.com/vllm-project/semantic-router/semantic-router/pkg/observability"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
"google.golang.org/grpc"
)

Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/extproc/stream_handling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
)

var _ = Describe("Process Stream Handling", func() {
Expand Down
12 changes: 6 additions & 6 deletions src/semantic-router/pkg/extproc/test_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ import (
"google.golang.org/grpc/metadata"

candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/semantic-router/pkg/tools"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/classification"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/pii"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/tools"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/classification"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/pii"
)

// MockStream implements the ext_proc.ExternalProcessor_ProcessServer interface for testing
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

// Minimal fallback bucket configurations - used only when configuration is completely missing
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"testing"
"time"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

// TestMain ensures metrics are initialized before running tests
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/services/classification.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"fmt"
"time"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/utils/classification"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/classification"
)

// Global classification service instance
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/utils/classification/classifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import (
"time"

candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
)

type CategoryInference interface {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
. "github.com/onsi/gomega"

candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

func TestClassifier(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/utils/http/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
)

// CreatePIIViolationResponse creates an HTTP response for PII policy violations
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/utils/pii/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"log"
"slices"

"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
)

// PolicyChecker handles PII policy validation
Expand Down
3 changes: 2 additions & 1 deletion website/docs/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ sidebar_position: 1

[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/vllm-project/semantic-router/blob/main/LICENSE)
[![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Community-yellow)](https://huggingface.co/LLM-Semantic-Router)
[![Go Report Card](https://goreportcard.com/badge/github.com/vllm-project/semantic-router)](https://goreportcard.com/report/github.com/vllm-project/semantic-router)
[![Go Report Card](https://goreportcard.com/badge/github.com/vllm-project/semantic-router/src/semantic-router)](https://goreportcard.com/report/github.com/vllm-project/semantic-router/src/semantic-router)
![](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)

An intelligent **Mixture-of-Models (MoM)** router that acts as an Envoy External Processor (ExtProc) to intelligently direct OpenAI API requests to the most suitable backend model from a defined pool. Using BERT-based semantic understanding and classification, it optimizes both performance and cost efficiency.

Expand Down
Loading