From e8a914f7c9744958cf4957e3088749bef8b162b6 Mon Sep 17 00:00:00 2001 From: bitliu Date: Tue, 16 Sep 2025 18:51:57 +0800 Subject: [PATCH] api: add semantic route support Signed-off-by: bitliu --- .../crds/vllm.ai_semanticroutes.yaml | 293 +++++++++++ examples/semanticroute/README.md | 179 +++++++ .../semanticroute/complex-filter-chain.yaml | 59 +++ .../semanticroute/comprehensive-example.yaml | 109 ++++ examples/semanticroute/multiple-routes.yaml | 72 +++ .../semanticroute/simple-intent-routing.yaml | 26 + .../semanticroute/tool-selection-example.yaml | 50 ++ examples/semanticroute/weighted-routing.yaml | 49 ++ src/semantic-router/go.mod | 15 +- src/semantic-router/go.sum | 41 +- src/semantic-router/hack/boilerplate.go.txt | 15 + .../pkg/apis/vllm.ai/v1alpha1/doc.go | 4 + .../apis/vllm.ai/v1alpha1/filter_helpers.go | 253 ++++++++++ .../pkg/apis/vllm.ai/v1alpha1/filter_types.go | 220 ++++++++ .../pkg/apis/vllm.ai/v1alpha1/register.go | 47 ++ .../pkg/apis/vllm.ai/v1alpha1/types.go | 179 +++++++ .../vllm.ai/v1alpha1/zz_generated.deepcopy.go | 477 ++++++++++++++++++ tools/make/golang.mk | 16 + 18 files changed, 2099 insertions(+), 5 deletions(-) create mode 100644 deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml create mode 100644 examples/semanticroute/README.md create mode 100644 examples/semanticroute/complex-filter-chain.yaml create mode 100644 examples/semanticroute/comprehensive-example.yaml create mode 100644 examples/semanticroute/multiple-routes.yaml create mode 100644 examples/semanticroute/simple-intent-routing.yaml create mode 100644 examples/semanticroute/tool-selection-example.yaml create mode 100644 examples/semanticroute/weighted-routing.yaml create mode 100644 src/semantic-router/hack/boilerplate.go.txt create mode 100644 src/semantic-router/pkg/apis/vllm.ai/v1alpha1/doc.go create mode 100644 src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_helpers.go create mode 100644 src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_types.go create mode 100644 src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go create mode 100644 src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go create mode 100644 src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go diff --git a/deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml b/deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml new file mode 100644 index 00000000..c943e699 --- /dev/null +++ b/deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml @@ -0,0 +1,293 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: semanticroutes.vllm.ai +spec: + group: vllm.ai + names: + kind: SemanticRoute + listKind: SemanticRouteList + plural: semanticroutes + shortNames: + - sr + singular: semanticroute + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Number of routing rules + jsonPath: .spec.rules + name: Rules + type: integer + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: SemanticRoute defines a semantic routing rule for LLM requests + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: SemanticRouteSpec defines the desired state of SemanticRoute + properties: + rules: + description: Rules defines the routing rules to be applied + items: + description: RouteRule defines a single routing rule + properties: + defaultModel: + description: DefaultModel defines the fallback model if no modelRefs + are available + properties: + address: + description: Address defines the endpoint address + maxLength: 255 + minLength: 1 + type: string + modelName: + description: ModelName defines the name of the model + maxLength: 100 + minLength: 1 + type: string + port: + description: Port defines the endpoint port + format: int32 + maximum: 65535 + minimum: 1 + type: integer + priority: + description: Priority defines the priority of this model + reference (higher values = higher priority) + format: int32 + maximum: 1000 + minimum: 0 + type: integer + weight: + default: 100 + description: Weight defines the traffic weight for this + model (0-100) + format: int32 + maximum: 100 + minimum: 0 + type: integer + required: + - address + - modelName + - port + type: object + filters: + description: Filters defines the optional filters to be applied + to requests matching this rule + items: + description: Filter defines a filter to be applied to requests + properties: + config: + description: Config defines the filter-specific configuration + type: object + x-kubernetes-preserve-unknown-fields: true + enabled: + default: true + description: Enabled defines whether this filter is enabled + type: boolean + type: + allOf: + - enum: + - PIIDetection + - PromptGuard + - SemanticCache + - ReasoningControl + - ToolSelection + - enum: + - PIIDetection + - PromptGuard + - SemanticCache + - ReasoningControl + description: Type defines the filter type + type: string + required: + - type + type: object + maxItems: 20 + type: array + intents: + description: Intents defines the intent categories that this + rule should match + items: + description: Intent defines an intent category for routing + properties: + category: + description: Category defines the intent category name + (e.g., "math", "computer science", "creative") + maxLength: 100 + minLength: 1 + pattern: ^[a-zA-Z0-9\s\-_]+$ + type: string + description: + description: Description provides an optional description + of this intent category + maxLength: 500 + type: string + threshold: + default: 0.7 + description: Threshold defines the confidence threshold + for this intent (0.0-1.0) + maximum: 1 + minimum: 0 + type: number + required: + - category + type: object + maxItems: 50 + minItems: 1 + type: array + modelRefs: + description: ModelRefs defines the target models for this routing + rule + items: + description: ModelRef defines a reference to a model endpoint + properties: + address: + description: Address defines the endpoint address + maxLength: 255 + minLength: 1 + type: string + modelName: + description: ModelName defines the name of the model + maxLength: 100 + minLength: 1 + type: string + port: + description: Port defines the endpoint port + format: int32 + maximum: 65535 + minimum: 1 + type: integer + priority: + description: Priority defines the priority of this model + reference (higher values = higher priority) + format: int32 + maximum: 1000 + minimum: 0 + type: integer + weight: + default: 100 + description: Weight defines the traffic weight for this + model (0-100) + format: int32 + maximum: 100 + minimum: 0 + type: integer + required: + - address + - modelName + - port + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - intents + - modelRefs + type: object + maxItems: 100 + minItems: 1 + type: array + required: + - rules + type: object + status: + description: SemanticRouteStatus defines the observed state of SemanticRoute + properties: + activeRules: + description: ActiveRules indicates the number of currently active + routing rules + format: int32 + type: integer + conditions: + description: Conditions represent the latest available observations + of the SemanticRoute's current state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + observedGeneration: + description: ObservedGeneration reflects the generation of the most + recently observed SemanticRoute + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/examples/semanticroute/README.md b/examples/semanticroute/README.md new file mode 100644 index 00000000..d232b3c1 --- /dev/null +++ b/examples/semanticroute/README.md @@ -0,0 +1,179 @@ +# SemanticRoute Examples + +This directory contains various examples of SemanticRoute configurations demonstrating different routing scenarios and capabilities. + +## Examples Overview + +### 1. Simple Intent Routing (`simple-intent-routing.yaml`) + +A basic example showing intent-based routing for math and computer science queries. + +**Features:** + +- Simple intent matching with categories +- Single model reference with fallback +- Minimal configuration + +**Use Case:** Basic routing based on query categories without complex filtering. + +### 2. Complex Filter Chain (`complex-filter-chain.yaml`) + +Demonstrates a comprehensive filter chain with multiple security and performance filters. + +**Features:** + +- PII detection with custom allowed types +- Prompt guard with custom security rules +- Semantic caching for performance +- Reasoning control configuration + +**Use Case:** Production environments requiring security, privacy, and performance optimizations. + +### 3. Multiple Routes (`multiple-routes.yaml`) + +Shows how to define multiple routing rules within a single SemanticRoute resource. + +**Features:** + +- Separate rules for technical vs. creative queries +- Different reasoning configurations per rule +- Rule-specific caching strategies + +**Use Case:** Applications serving diverse query types with different processing requirements. + +### 4. Weighted Routing (`weighted-routing.yaml`) + +Demonstrates traffic distribution across multiple model endpoints using weights and priorities. + +**Features:** + +- Traffic splitting (80/20) between models +- Priority-based failover +- Load balancing configuration + +**Use Case:** A/B testing, gradual rollouts, or load distribution across model endpoints. + +### 5. Tool Selection Example (`tool-selection-example.yaml`) + +Demonstrates automatic tool selection based on semantic similarity to user queries. + +**Features:** + +- Automatic tool selection with configurable similarity threshold +- Tool filtering by categories and tags +- Fallback behavior configuration +- Integration with semantic caching and reasoning control + +**Use Case:** Applications requiring dynamic tool selection based on user intent and query content. + +### 6. Comprehensive Example (`comprehensive-example.yaml`) + +A production-ready configuration showcasing all SemanticRoute features. + +**Features:** + +- Multiple rules with different configurations +- Advanced filtering with custom rules +- External cache backend (Redis) +- High-availability model setup +- Comprehensive security policies + +**Use Case:** Enterprise production deployments requiring full feature utilization. + +## Deployment Instructions + +### Prerequisites + +1. Kubernetes cluster with SemanticRoute CRD installed: + + ```bash + kubectl apply -f ../../deploy/kubernetes/crds/vllm.ai_semanticroutes.yaml + ``` + +2. Ensure your model endpoints are accessible from the cluster. + +### Deploy Examples + +1. **Deploy a single example:** + + ```bash + kubectl apply -f simple-intent-routing.yaml + ``` + +2. **Deploy all examples:** + + ```bash + kubectl apply -f . + ``` + +3. **Verify deployment:** + + ```bash + kubectl get semanticroutes + kubectl describe semanticroute reasoning-route + ``` + +## Configuration Reference + +### Intent Configuration + +```yaml +intents: +- category: "math" # Required: Intent category name + description: "Mathematics queries" # Optional: Human-readable description + threshold: 0.7 # Optional: Confidence threshold (0.0-1.0) +``` + +### Model Reference Configuration + +```yaml +modelRefs: +- modelName: "gpt-oss" # Required: Model identifier + address: "127.0.0.1" # Required: Endpoint address + port: 8080 # Required: Endpoint port + weight: 80 # Optional: Traffic weight (0-100) + priority: 100 # Optional: Priority for failover +``` + +### Filter Configuration + +Each filter type has specific configuration options: + +- **PIIDetection**: Controls PII detection and handling +- **PromptGuard**: Provides security and jailbreak protection +- **SemanticCache**: Enables response caching for performance +- **ReasoningControl**: Manages reasoning mode behavior +- **ToolSelection**: Enables automatic tool selection based on semantic similarity + +## Best Practices + +1. **Start Simple**: Begin with basic intent routing and add filters as needed. + +2. **Test Thoroughly**: Validate routing behavior with representative queries. + +3. **Monitor Performance**: Use appropriate cache settings and monitor hit rates. + +4. **Security First**: Enable PII detection and prompt guard in production. + +5. **Gradual Rollout**: Use weighted routing for safe model deployments. + +## Troubleshooting + +### Common Issues + +1. **Route Not Matching**: Check intent categories and thresholds. +2. **Model Unreachable**: Verify endpoint addresses and network connectivity. +3. **Filter Errors**: Validate filter configurations against the schema. + +### Debugging Commands + +```bash +# Check SemanticRoute status +kubectl get sr -o wide + +# View detailed configuration +kubectl describe semanticroute + +# Check logs (if controller is deployed) +kubectl logs -l app=semantic-router-controller +``` diff --git a/examples/semanticroute/complex-filter-chain.yaml b/examples/semanticroute/complex-filter-chain.yaml new file mode 100644 index 00000000..6eabacfc --- /dev/null +++ b/examples/semanticroute/complex-filter-chain.yaml @@ -0,0 +1,59 @@ +apiVersion: vllm.ai/v1alpha1 +kind: SemanticRoute +metadata: + name: complex-route + namespace: default + labels: + app: semantic-router + scenario: complex-filter-chain +spec: + rules: + - intents: + - category: "computer science" + description: "Programming, algorithms, data structures" + threshold: 0.7 + - category: "math" + description: "Mathematics, calculus, algebra" + threshold: 0.7 + modelRefs: + - modelName: gpt-oss + address: 127.0.0.1 + port: 8080 + weight: 100 + filters: + - type: PIIDetection + enabled: true + config: + allowByDefault: false + pii_types_allowed: ["EMAIL_ADDRESS", "PERSON"] + threshold: 0.7 + action: "block" + - type: PromptGuard + enabled: true + config: + threshold: 0.7 + action: "block" + customRules: + - name: "sensitive-data-rule" + pattern: "(?i)(password|secret|token|key)" + action: "block" + description: "Block requests containing sensitive data keywords" + - type: SemanticCache + enabled: true + config: + similarityThreshold: 0.8 + maxEntries: 1000 + ttlSeconds: 3600 + backend: "memory" + - type: ReasoningControl + enabled: true + config: + reasonFamily: "gpt-oss" + enableReasoning: true + reasoningEffort: "medium" + maxReasoningSteps: 10 + reasoningTimeout: 30 + defaultModel: + modelName: deepseek-v31 + address: 127.0.0.1 + port: 8088 diff --git a/examples/semanticroute/comprehensive-example.yaml b/examples/semanticroute/comprehensive-example.yaml new file mode 100644 index 00000000..fd5db4d9 --- /dev/null +++ b/examples/semanticroute/comprehensive-example.yaml @@ -0,0 +1,109 @@ +apiVersion: vllm.ai/v1alpha1 +kind: SemanticRoute +metadata: + name: comprehensive-example + namespace: default + labels: + app: semantic-router + scenario: comprehensive + environment: production +spec: + rules: + # Rule 1: High-performance reasoning route for technical queries + - intents: + - category: "computer science" + description: "Programming, algorithms, software engineering" + threshold: 0.75 + - category: "math" + description: "Advanced mathematics, calculus, statistics" + threshold: 0.75 + modelRefs: + - modelName: gpt-oss-premium + address: 127.0.0.1 + port: 8080 + weight: 70 + priority: 100 + - modelName: claude-reasoning + address: 127.0.0.1 + port: 8082 + weight: 30 + priority: 95 + filters: + - type: PIIDetection + enabled: true + config: + allowByDefault: false + pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE"] + threshold: 0.8 + action: "block" + - type: PromptGuard + enabled: true + config: + threshold: 0.75 + action: "block" + customRules: + - name: "code-injection-rule" + pattern: "(?i)(eval|exec|system|shell|cmd)" + action: "warn" + description: "Detect potential code injection attempts" + - type: SemanticCache + enabled: true + config: + similarityThreshold: 0.85 + maxEntries: 2000 + ttlSeconds: 7200 + backend: "redis" + backendConfig: + host: "redis.cache.svc.cluster.local" + port: "6379" + - type: ReasoningControl + enabled: true + config: + reasonFamily: "gpt-oss" + enableReasoning: true + reasoningEffort: "high" + maxReasoningSteps: 20 + reasoningTimeout: 60 + defaultModel: + modelName: deepseek-v31 + address: 127.0.0.1 + port: 8088 + + # Rule 2: Creative and general purpose route + - intents: + - category: "creative" + description: "Creative writing, storytelling, art generation" + threshold: 0.6 + - category: "other" + description: "General purpose conversations" + threshold: 0.5 + modelRefs: + - modelName: creative-model + address: 127.0.0.1 + port: 8081 + weight: 100 + filters: + - type: PIIDetection + enabled: true + config: + allowByDefault: true + pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] + threshold: 0.7 + action: "mask" + - type: ReasoningControl + enabled: true + config: + reasonFamily: "gpt-oss" + enableReasoning: false + reasoningEffort: "low" + - type: SemanticCache + enabled: true + config: + similarityThreshold: 0.75 + maxEntries: 1000 + ttlSeconds: 3600 + backend: "memory" + defaultModel: + modelName: general-model + address: 127.0.0.1 + port: 8089 diff --git a/examples/semanticroute/multiple-routes.yaml b/examples/semanticroute/multiple-routes.yaml new file mode 100644 index 00000000..011c8901 --- /dev/null +++ b/examples/semanticroute/multiple-routes.yaml @@ -0,0 +1,72 @@ +apiVersion: vllm.ai/v1alpha1 +kind: SemanticRoute +metadata: + name: multiple-routes + namespace: default + labels: + app: semantic-router + scenario: multiple-routes +spec: + rules: + # Rule 1: Reasoning-enabled route for technical queries + - intents: + - category: "computer science" + description: "Programming, algorithms, data structures" + threshold: 0.7 + - category: "math" + description: "Mathematics, calculus, algebra" + threshold: 0.7 + modelRefs: + - modelName: gpt-oss + address: 127.0.0.1 + port: 8080 + weight: 100 + filters: + - type: ReasoningControl + enabled: true + config: + reasonFamily: "gpt-oss" + enableReasoning: true + reasoningEffort: "high" + maxReasoningSteps: 15 + - type: SemanticCache + enabled: true + config: + similarityThreshold: 0.85 + maxEntries: 500 + ttlSeconds: 7200 + defaultModel: + modelName: deepseek-v31 + address: 127.0.0.1 + port: 8088 + + # Rule 2: Lightweight route for creative and general queries + - intents: + - category: "creative" + description: "Creative writing, storytelling, art" + threshold: 0.6 + - category: "other" + description: "General purpose queries" + threshold: 0.5 + modelRefs: + - modelName: lightweight-model + address: 127.0.0.1 + port: 8081 + weight: 100 + filters: + - type: ReasoningControl + enabled: true + config: + reasonFamily: "gpt-oss" + enableReasoning: false + reasoningEffort: "low" + - type: PIIDetection + enabled: true + config: + allowByDefault: true + threshold: 0.8 + action: "mask" + defaultModel: + modelName: general-model + address: 127.0.0.1 + port: 8089 diff --git a/examples/semanticroute/simple-intent-routing.yaml b/examples/semanticroute/simple-intent-routing.yaml new file mode 100644 index 00000000..99abd5b0 --- /dev/null +++ b/examples/semanticroute/simple-intent-routing.yaml @@ -0,0 +1,26 @@ +apiVersion: vllm.ai/v1alpha1 +kind: SemanticRoute +metadata: + name: reasoning-route + namespace: default + labels: + app: semantic-router + scenario: simple-intent +spec: + rules: + - intents: + - category: "computer science" + description: "Programming, algorithms, data structures, software engineering" + threshold: 0.7 + - category: "math" + description: "Mathematics, calculus, algebra, statistics" + threshold: 0.7 + modelRefs: + - modelName: gpt-oss + address: 127.0.0.1 + port: 8080 + weight: 100 + defaultModel: + modelName: deepseek-v31 + address: 127.0.0.1 + port: 8088 diff --git a/examples/semanticroute/tool-selection-example.yaml b/examples/semanticroute/tool-selection-example.yaml new file mode 100644 index 00000000..7cd4c219 --- /dev/null +++ b/examples/semanticroute/tool-selection-example.yaml @@ -0,0 +1,50 @@ +apiVersion: vllm.ai/v1alpha1 +kind: SemanticRoute +metadata: + name: tool-selection-example + namespace: default + labels: + app: semantic-router + scenario: tool-selection +spec: + rules: + - intents: + - category: "computer science" + description: "Programming, algorithms, data structures" + threshold: 0.7 + - category: "math" + description: "Mathematics, calculus, algebra" + threshold: 0.7 + modelRefs: + - modelName: gpt-oss + address: 127.0.0.1 + port: 8080 + weight: 100 + filters: + - type: ToolSelection + enabled: true + config: + topK: 3 + similarityThreshold: 0.8 + toolsDBPath: "config/tools_db.json" + fallbackToEmpty: true + categories: ["weather", "calculation", "search"] + tags: ["utility", "api", "function"] + - type: SemanticCache + enabled: true + config: + similarityThreshold: 0.85 + maxEntries: 1000 + ttlSeconds: 3600 + backend: "memory" + - type: ReasoningControl + enabled: true + config: + reasonFamily: "gpt-oss" + enableReasoning: true + reasoningEffort: "medium" + maxReasoningSteps: 10 + defaultModel: + modelName: deepseek-v31 + address: 127.0.0.1 + port: 8088 diff --git a/examples/semanticroute/weighted-routing.yaml b/examples/semanticroute/weighted-routing.yaml new file mode 100644 index 00000000..19f381cb --- /dev/null +++ b/examples/semanticroute/weighted-routing.yaml @@ -0,0 +1,49 @@ +apiVersion: vllm.ai/v1alpha1 +kind: SemanticRoute +metadata: + name: weighted-routing + namespace: default + labels: + app: semantic-router + scenario: weighted-routing +spec: + rules: + - intents: + - category: "computer science" + description: "Programming, algorithms, data structures" + threshold: 0.7 + - category: "math" + description: "Mathematics, calculus, algebra" + threshold: 0.7 + modelRefs: + # Primary model gets 80% of traffic + - modelName: gpt-oss + address: 127.0.0.1 + port: 8080 + weight: 80 + priority: 100 + # Secondary model gets 20% of traffic + - modelName: qwen3 + address: 127.0.0.1 + port: 8089 + weight: 20 + priority: 90 + filters: + - type: ReasoningControl + enabled: true + config: + reasonFamily: "gpt-oss" + enableReasoning: true + reasoningEffort: "medium" + maxReasoningSteps: 10 + - type: SemanticCache + enabled: true + config: + similarityThreshold: 0.8 + maxEntries: 1000 + ttlSeconds: 3600 + backend: "memory" + defaultModel: + modelName: deepseek-v31 + address: 127.0.0.1 + port: 8088 diff --git a/src/semantic-router/go.mod b/src/semantic-router/go.mod index c467d425..e3406d7b 100644 --- a/src/semantic-router/go.mod +++ b/src/semantic-router/go.mod @@ -24,6 +24,7 @@ require ( go.uber.org/zap v1.27.0 google.golang.org/grpc v1.71.1 gopkg.in/yaml.v3 v3.0.1 + k8s.io/apimachinery v0.31.4 ) require ( @@ -34,17 +35,22 @@ require ( github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect github.com/cockroachdb/redact v1.1.3 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/getsentry/sentry-go v0.12.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/go-cmp v0.7.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect @@ -55,8 +61,9 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect github.com/tidwall/sjson v1.2.5 // indirect + github.com/x448/float16 v0.8.4 // indirect go.uber.org/automaxprocs v1.6.0 // indirect - go.uber.org/multierr v1.10.0 // indirect + go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.41.0 // indirect golang.org/x/sync v0.15.0 // indirect golang.org/x/sys v0.33.0 // indirect @@ -64,4 +71,10 @@ require ( golang.org/x/tools v0.33.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect google.golang.org/protobuf v1.36.6 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/src/semantic-router/go.sum b/src/semantic-router/go.sum index acb526d9..42ee628e 100644 --- a/src/semantic-router/go.sum +++ b/src/semantic-router/go.sum @@ -32,8 +32,9 @@ github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3Ee github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -53,6 +54,8 @@ github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc= github.com/getsentry/sentry-go v0.12.0 h1:era7g0re5iY13bHSdN/xMkyV+5zZppjRVQhZrXCaEIk= github.com/getsentry/sentry-go v0.12.0/go.mod h1:NSap0JBYWzHND8oMbyi0+XZhUalc1TBdRL1M71JZW2c= @@ -107,10 +110,13 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -133,6 +139,8 @@ github.com/iris-contrib/pongo2 v0.0.1/go.mod h1:Ssh+00+3GAZqSQb30AvBRNxBx7rf0Gqw github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= github.com/kataras/golog v0.0.10/go.mod h1:yJ8YKCmyL+nWjERB90Qwn+bdyBZsaQwU3bTVFgkFIp8= @@ -180,9 +188,12 @@ github.com/milvus-io/milvus-sdk-go/v2 v2.4.2/go.mod h1:ulO1YUXKH0PGg50q27grw048G github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= @@ -209,8 +220,9 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= @@ -240,6 +252,8 @@ github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -272,6 +286,8 @@ github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBn github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= @@ -301,8 +317,8 @@ go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwE go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= -go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= @@ -448,11 +464,16 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= @@ -460,3 +481,15 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +k8s.io/apimachinery v0.31.4 h1:8xjE2C4CzhYVm9DGf60yohpNUh5AEBnPxCryPBECmlM= +k8s.io/apimachinery v0.31.4/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/src/semantic-router/hack/boilerplate.go.txt b/src/semantic-router/hack/boilerplate.go.txt new file mode 100644 index 00000000..8f48a295 --- /dev/null +++ b/src/semantic-router/hack/boilerplate.go.txt @@ -0,0 +1,15 @@ +/* +Copyright 2025 vLLM Semantic Router. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/doc.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/doc.go new file mode 100644 index 00000000..fa6e3ba6 --- /dev/null +++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/doc.go @@ -0,0 +1,4 @@ +// Package v1alpha1 contains API Schema definitions for the v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=vllm.ai +package v1alpha1 diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_helpers.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_helpers.go new file mode 100644 index 00000000..fef12657 --- /dev/null +++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_helpers.go @@ -0,0 +1,253 @@ +/* +Copyright 2025 vLLM Semantic Router. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "encoding/json" + "fmt" + + "k8s.io/apimachinery/pkg/runtime" +) + +// FilterConfigHelper provides helper methods for working with filter configurations +type FilterConfigHelper struct{} + +// NewFilterConfigHelper creates a new FilterConfigHelper +func NewFilterConfigHelper() *FilterConfigHelper { + return &FilterConfigHelper{} +} + +// MarshalFilterConfig marshals a filter configuration to RawExtension +func (h *FilterConfigHelper) MarshalFilterConfig(config interface{}) (*runtime.RawExtension, error) { + if config == nil { + return nil, nil + } + + data, err := json.Marshal(config) + if err != nil { + return nil, fmt.Errorf("failed to marshal filter config: %w", err) + } + + return &runtime.RawExtension{Raw: data}, nil +} + +// UnmarshalPIIDetectionConfig unmarshals a PIIDetectionConfig from RawExtension +func (h *FilterConfigHelper) UnmarshalPIIDetectionConfig(raw *runtime.RawExtension) (*PIIDetectionConfig, error) { + if raw == nil || len(raw.Raw) == 0 { + return &PIIDetectionConfig{}, nil + } + + var config PIIDetectionConfig + if err := json.Unmarshal(raw.Raw, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal PIIDetectionConfig: %w", err) + } + + return &config, nil +} + +// UnmarshalPromptGuardConfig unmarshals a PromptGuardConfig from RawExtension +func (h *FilterConfigHelper) UnmarshalPromptGuardConfig(raw *runtime.RawExtension) (*PromptGuardConfig, error) { + if raw == nil || len(raw.Raw) == 0 { + return &PromptGuardConfig{}, nil + } + + var config PromptGuardConfig + if err := json.Unmarshal(raw.Raw, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal PromptGuardConfig: %w", err) + } + + return &config, nil +} + +// UnmarshalSemanticCacheConfig unmarshals a SemanticCacheConfig from RawExtension +func (h *FilterConfigHelper) UnmarshalSemanticCacheConfig(raw *runtime.RawExtension) (*SemanticCacheConfig, error) { + if raw == nil || len(raw.Raw) == 0 { + return &SemanticCacheConfig{}, nil + } + + var config SemanticCacheConfig + if err := json.Unmarshal(raw.Raw, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal SemanticCacheConfig: %w", err) + } + + return &config, nil +} + +// UnmarshalReasoningControlConfig unmarshals a ReasoningControlConfig from RawExtension +func (h *FilterConfigHelper) UnmarshalReasoningControlConfig(raw *runtime.RawExtension) (*ReasoningControlConfig, error) { + if raw == nil || len(raw.Raw) == 0 { + return &ReasoningControlConfig{}, nil + } + + var config ReasoningControlConfig + if err := json.Unmarshal(raw.Raw, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal ReasoningControlConfig: %w", err) + } + + return &config, nil +} + +// MarshalToolSelectionConfig marshals a ToolSelectionConfig to RawExtension +func (h *FilterConfigHelper) MarshalToolSelectionConfig(config *ToolSelectionConfig) (*runtime.RawExtension, error) { + if config == nil { + return &runtime.RawExtension{}, nil + } + + data, err := json.Marshal(config) + if err != nil { + return nil, fmt.Errorf("failed to marshal ToolSelectionConfig: %w", err) + } + + return &runtime.RawExtension{Raw: data}, nil +} + +// UnmarshalToolSelectionConfig unmarshals a ToolSelectionConfig from RawExtension +func (h *FilterConfigHelper) UnmarshalToolSelectionConfig(raw *runtime.RawExtension) (*ToolSelectionConfig, error) { + if raw == nil || len(raw.Raw) == 0 { + return &ToolSelectionConfig{}, nil + } + + var config ToolSelectionConfig + if err := json.Unmarshal(raw.Raw, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal ToolSelectionConfig: %w", err) + } + + return &config, nil +} + +// UnmarshalFilterConfig unmarshals a filter configuration based on the filter type +func (h *FilterConfigHelper) UnmarshalFilterConfig(filterType FilterType, raw *runtime.RawExtension) (interface{}, error) { + switch filterType { + case FilterTypePIIDetection: + return h.UnmarshalPIIDetectionConfig(raw) + case FilterTypePromptGuard: + return h.UnmarshalPromptGuardConfig(raw) + case FilterTypeSemanticCache: + return h.UnmarshalSemanticCacheConfig(raw) + case FilterTypeReasoningControl: + return h.UnmarshalReasoningControlConfig(raw) + case FilterTypeToolSelection: + return h.UnmarshalToolSelectionConfig(raw) + default: + return nil, fmt.Errorf("unsupported filter type: %s", filterType) + } +} + +// ValidateFilterConfig validates a filter configuration +func (h *FilterConfigHelper) ValidateFilterConfig(filter *Filter) error { + if filter == nil { + return fmt.Errorf("filter cannot be nil") + } + + // Validate filter type + switch filter.Type { + case FilterTypePIIDetection, FilterTypePromptGuard, FilterTypeSemanticCache, FilterTypeReasoningControl, FilterTypeToolSelection: + // Valid filter types + default: + return fmt.Errorf("invalid filter type: %s", filter.Type) + } + + // If config is provided, try to unmarshal it to validate structure + if filter.Config != nil { + _, err := h.UnmarshalFilterConfig(filter.Type, filter.Config) + if err != nil { + return fmt.Errorf("invalid filter config for type %s: %w", filter.Type, err) + } + } + + return nil +} + +// CreatePIIDetectionFilter creates a PIIDetection filter with the given configuration +func CreatePIIDetectionFilter(config *PIIDetectionConfig) (*Filter, error) { + helper := NewFilterConfigHelper() + rawConfig, err := helper.MarshalFilterConfig(config) + if err != nil { + return nil, err + } + + enabled := true + return &Filter{ + Type: FilterTypePIIDetection, + Config: rawConfig, + Enabled: &enabled, + }, nil +} + +// CreatePromptGuardFilter creates a PromptGuard filter with the given configuration +func CreatePromptGuardFilter(config *PromptGuardConfig) (*Filter, error) { + helper := NewFilterConfigHelper() + rawConfig, err := helper.MarshalFilterConfig(config) + if err != nil { + return nil, err + } + + enabled := true + return &Filter{ + Type: FilterTypePromptGuard, + Config: rawConfig, + Enabled: &enabled, + }, nil +} + +// CreateSemanticCacheFilter creates a SemanticCache filter with the given configuration +func CreateSemanticCacheFilter(config *SemanticCacheConfig) (*Filter, error) { + helper := NewFilterConfigHelper() + rawConfig, err := helper.MarshalFilterConfig(config) + if err != nil { + return nil, err + } + + enabled := true + return &Filter{ + Type: FilterTypeSemanticCache, + Config: rawConfig, + Enabled: &enabled, + }, nil +} + +// CreateReasoningControlFilter creates a ReasoningControl filter with the given configuration +func CreateReasoningControlFilter(config *ReasoningControlConfig) (*Filter, error) { + helper := NewFilterConfigHelper() + rawConfig, err := helper.MarshalFilterConfig(config) + if err != nil { + return nil, err + } + + enabled := true + return &Filter{ + Type: FilterTypeReasoningControl, + Config: rawConfig, + Enabled: &enabled, + }, nil +} + +// CreateToolSelectionFilter creates a ToolSelection filter with the given configuration +func CreateToolSelectionFilter(config *ToolSelectionConfig) (*Filter, error) { + helper := NewFilterConfigHelper() + rawConfig, err := helper.MarshalFilterConfig(config) + if err != nil { + return nil, err + } + + enabled := true + return &Filter{ + Type: FilterTypeToolSelection, + Config: rawConfig, + Enabled: &enabled, + }, nil +} diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_types.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_types.go new file mode 100644 index 00000000..7394954a --- /dev/null +++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/filter_types.go @@ -0,0 +1,220 @@ +/* +Copyright 2025 vLLM Semantic Router. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +// PIIDetectionConfig defines the configuration for PII detection filter +type PIIDetectionConfig struct { + // AllowByDefault defines whether PII is allowed by default + // +optional + // +kubebuilder:default=false + AllowByDefault *bool `json:"allowByDefault,omitempty"` + + // PIITypesAllowed defines the list of PII types that are allowed + // +optional + // +kubebuilder:validation:MaxItems=50 + PIITypesAllowed []string `json:"pii_types_allowed,omitempty"` + + // Threshold defines the confidence threshold for PII detection (0.0-1.0) + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1 + // +kubebuilder:default=0.7 + Threshold *float64 `json:"threshold,omitempty"` + + // Action defines what to do when PII is detected + // +optional + // +kubebuilder:validation:Enum=block;mask;allow + // +kubebuilder:default=block + Action *string `json:"action,omitempty"` +} + +// PromptGuardConfig defines the configuration for prompt guard filter +type PromptGuardConfig struct { + // Threshold defines the confidence threshold for jailbreak detection (0.0-1.0) + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1 + // +kubebuilder:default=0.7 + Threshold *float64 `json:"threshold,omitempty"` + + // Action defines what to do when a jailbreak attempt is detected + // +optional + // +kubebuilder:validation:Enum=block;warn;allow + // +kubebuilder:default=block + Action *string `json:"action,omitempty"` + + // CustomRules defines additional custom security rules + // +optional + // +kubebuilder:validation:MaxItems=100 + CustomRules []SecurityRule `json:"customRules,omitempty"` +} + +// SecurityRule defines a custom security rule +type SecurityRule struct { + // Name defines the name of the security rule + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=100 + Name string `json:"name"` + + // Pattern defines the regex pattern to match + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=1000 + Pattern string `json:"pattern"` + + // Action defines the action to take when this rule matches + // +kubebuilder:validation:Required + // +kubebuilder:validation:Enum=block;warn;allow + Action string `json:"action"` + + // Description provides an optional description of this rule + // +optional + // +kubebuilder:validation:MaxLength=500 + Description string `json:"description,omitempty"` +} + +// SemanticCacheConfig defines the configuration for semantic cache filter +type SemanticCacheConfig struct { + // SimilarityThreshold defines the similarity threshold for cache hits (0.0-1.0) + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1 + // +kubebuilder:default=0.8 + SimilarityThreshold *float64 `json:"similarityThreshold,omitempty"` + + // MaxEntries defines the maximum number of cache entries + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=1000000 + // +kubebuilder:default=1000 + MaxEntries *int32 `json:"maxEntries,omitempty"` + + // TTLSeconds defines the time-to-live for cache entries in seconds + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=86400 + // +kubebuilder:default=3600 + TTLSeconds *int32 `json:"ttlSeconds,omitempty"` + + // Backend defines the cache backend type + // +optional + // +kubebuilder:validation:Enum=memory;redis;milvus + // +kubebuilder:default=memory + Backend *string `json:"backend,omitempty"` + + // BackendConfig defines backend-specific configuration + // +optional + BackendConfig map[string]string `json:"backendConfig,omitempty"` +} + +// ReasoningControlConfig defines the configuration for reasoning control filter +type ReasoningControlConfig struct { + // ReasonFamily defines the reasoning family to use + // +optional + // +kubebuilder:validation:Enum=gpt-oss;deepseek;qwen3;claude + ReasonFamily *string `json:"reasonFamily,omitempty"` + + // EnableReasoning defines whether reasoning mode is enabled + // +optional + // +kubebuilder:default=true + EnableReasoning *bool `json:"enableReasoning,omitempty"` + + // ReasoningEffort defines the reasoning effort level + // +optional + // +kubebuilder:validation:Enum=low;medium;high + // +kubebuilder:default=medium + ReasoningEffort *string `json:"reasoningEffort,omitempty"` + + // MaxReasoningSteps defines the maximum number of reasoning steps + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=100 + // +kubebuilder:default=10 + MaxReasoningSteps *int32 `json:"maxReasoningSteps,omitempty"` + + // ReasoningTimeout defines the timeout for reasoning in seconds + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=300 + // +kubebuilder:default=30 + ReasoningTimeout *int32 `json:"reasoningTimeout,omitempty"` +} + +// ToolSelectionConfig defines the configuration for automatic tool selection filter +type ToolSelectionConfig struct { + // TopK defines the number of top tools to select based on similarity + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=20 + // +kubebuilder:default=3 + TopK *int32 `json:"topK,omitempty"` + + // SimilarityThreshold defines the similarity threshold for tool selection (0.0-1.0) + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1 + // +kubebuilder:default=0.2 + SimilarityThreshold *float64 `json:"similarityThreshold,omitempty"` + + // ToolsDBPath defines the path to the tools database file + // +optional + // +kubebuilder:default="config/tools_db.json" + ToolsDBPath *string `json:"toolsDBPath,omitempty"` + + // FallbackToEmpty defines whether to return empty tools on failure + // +optional + // +kubebuilder:default=true + FallbackToEmpty *bool `json:"fallbackToEmpty,omitempty"` + + // Categories defines the tool categories to include in selection + // +optional + // +kubebuilder:validation:MaxItems=20 + Categories []string `json:"categories,omitempty"` + + // Tags defines the tool tags to include in selection + // +optional + // +kubebuilder:validation:MaxItems=50 + Tags []string `json:"tags,omitempty"` +} + +// FilterCondition defines a condition for applying filters +type FilterCondition struct { + // Type defines the condition type + // +kubebuilder:validation:Required + // +kubebuilder:validation:Enum=Always;Never;OnMatch;OnNoMatch + Type FilterConditionType `json:"type"` + + // Value defines the condition value (used with OnMatch/OnNoMatch) + // +optional + Value string `json:"value,omitempty"` +} + +// FilterConditionType defines the supported filter condition types +// +kubebuilder:validation:Enum=Always;Never;OnMatch;OnNoMatch +type FilterConditionType string + +const ( + // FilterConditionAlways means the filter is always applied + FilterConditionAlways FilterConditionType = "Always" + // FilterConditionNever means the filter is never applied + FilterConditionNever FilterConditionType = "Never" + // FilterConditionOnMatch means the filter is applied when a condition matches + FilterConditionOnMatch FilterConditionType = "OnMatch" + // FilterConditionOnNoMatch means the filter is applied when a condition doesn't match + FilterConditionOnNoMatch FilterConditionType = "OnNoMatch" +) diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go new file mode 100644 index 00000000..368bfb9a --- /dev/null +++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/register.go @@ -0,0 +1,47 @@ +/* +Copyright 2025 vLLM Semantic Router. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// GroupVersion is group version used to register these objects +var GroupVersion = schema.GroupVersion{Group: "vllm.ai", Version: "v1alpha1"} + +// SchemeBuilder is used to add go types to the GroupVersionKind scheme +var ( + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme +) + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return GroupVersion.WithResource(resource).GroupResource() +} + +// addKnownTypes adds the set of types defined in this package to the supplied scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(GroupVersion, + &SemanticRoute{}, + &SemanticRouteList{}, + ) + metav1.AddToGroupVersion(scheme, GroupVersion) + return nil +} diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go new file mode 100644 index 00000000..ba4ba0b5 --- /dev/null +++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types.go @@ -0,0 +1,179 @@ +/* +Copyright 2025 vLLM Semantic Router. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +// SemanticRoute defines a semantic routing rule for LLM requests +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:scope=Namespaced,shortName=sr +// +kubebuilder:printcolumn:name="Rules",type="integer",JSONPath=".spec.rules",description="Number of routing rules" +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" +type SemanticRoute struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec SemanticRouteSpec `json:"spec,omitempty"` + Status SemanticRouteStatus `json:"status,omitempty"` +} + +// SemanticRouteSpec defines the desired state of SemanticRoute +type SemanticRouteSpec struct { + // Rules defines the routing rules to be applied + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=100 + Rules []RouteRule `json:"rules"` +} + +// SemanticRouteStatus defines the observed state of SemanticRoute +type SemanticRouteStatus struct { + // Conditions represent the latest available observations of the SemanticRoute's current state + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ObservedGeneration reflects the generation of the most recently observed SemanticRoute + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // ActiveRules indicates the number of currently active routing rules + // +optional + ActiveRules int32 `json:"activeRules,omitempty"` +} + +// RouteRule defines a single routing rule +type RouteRule struct { + // Intents defines the intent categories that this rule should match + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=50 + Intents []Intent `json:"intents"` + + // ModelRefs defines the target models for this routing rule + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=10 + ModelRefs []ModelRef `json:"modelRefs"` + + // Filters defines the optional filters to be applied to requests matching this rule + // +optional + // +kubebuilder:validation:MaxItems=20 + Filters []Filter `json:"filters,omitempty"` + + // DefaultModel defines the fallback model if no modelRefs are available + // +optional + DefaultModel *ModelRef `json:"defaultModel,omitempty"` +} + +// Intent defines an intent category for routing +type Intent struct { + // Category defines the intent category name (e.g., "math", "computer science", "creative") + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=100 + // +kubebuilder:validation:Pattern=^[a-zA-Z0-9\s\-_]+$ + Category string `json:"category"` + + // Description provides an optional description of this intent category + // +optional + // +kubebuilder:validation:MaxLength=500 + Description string `json:"description,omitempty"` + + // Threshold defines the confidence threshold for this intent (0.0-1.0) + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1 + // +kubebuilder:default=0.7 + Threshold *float64 `json:"threshold,omitempty"` +} + +// ModelRef defines a reference to a model endpoint +type ModelRef struct { + // ModelName defines the name of the model + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=100 + ModelName string `json:"modelName"` + + // Address defines the endpoint address + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=255 + Address string `json:"address"` + + // Port defines the endpoint port + // +kubebuilder:validation:Required + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + Port int32 `json:"port"` + + // Weight defines the traffic weight for this model (0-100) + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=100 + // +kubebuilder:default=100 + Weight *int32 `json:"weight,omitempty"` + + // Priority defines the priority of this model reference (higher values = higher priority) + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=1000 + Priority *int32 `json:"priority,omitempty"` +} + +// Filter defines a filter to be applied to requests +type Filter struct { + // Type defines the filter type + // +kubebuilder:validation:Required + // +kubebuilder:validation:Enum=PIIDetection;PromptGuard;SemanticCache;ReasoningControl + Type FilterType `json:"type"` + + // Config defines the filter-specific configuration + // +optional + Config *runtime.RawExtension `json:"config,omitempty"` + + // Enabled defines whether this filter is enabled + // +optional + // +kubebuilder:default=true + Enabled *bool `json:"enabled,omitempty"` +} + +// FilterType defines the supported filter types +// +kubebuilder:validation:Enum=PIIDetection;PromptGuard;SemanticCache;ReasoningControl;ToolSelection +type FilterType string + +const ( + // FilterTypePIIDetection enables PII detection and filtering + FilterTypePIIDetection FilterType = "PIIDetection" + // FilterTypePromptGuard enables prompt security and jailbreak detection + FilterTypePromptGuard FilterType = "PromptGuard" + // FilterTypeSemanticCache enables semantic caching for performance optimization + FilterTypeSemanticCache FilterType = "SemanticCache" + // FilterTypeReasoningControl enables reasoning mode control + FilterTypeReasoningControl FilterType = "ReasoningControl" + // FilterTypeToolSelection enables automatic tool selection based on semantic similarity + FilterTypeToolSelection FilterType = "ToolSelection" +) + +// SemanticRouteList contains a list of SemanticRoute +// +kubebuilder:object:root=true +type SemanticRouteList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []SemanticRoute `json:"items"` +} diff --git a/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000..d4aab7df --- /dev/null +++ b/src/semantic-router/pkg/apis/vllm.ai/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,477 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2025 vLLM Semantic Router. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Filter) DeepCopyInto(out *Filter) { + *out = *in + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = new(runtime.RawExtension) + (*in).DeepCopyInto(*out) + } + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Filter. +func (in *Filter) DeepCopy() *Filter { + if in == nil { + return nil + } + out := new(Filter) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FilterCondition) DeepCopyInto(out *FilterCondition) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterCondition. +func (in *FilterCondition) DeepCopy() *FilterCondition { + if in == nil { + return nil + } + out := new(FilterCondition) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FilterConfigHelper) DeepCopyInto(out *FilterConfigHelper) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FilterConfigHelper. +func (in *FilterConfigHelper) DeepCopy() *FilterConfigHelper { + if in == nil { + return nil + } + out := new(FilterConfigHelper) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Intent) DeepCopyInto(out *Intent) { + *out = *in + if in.Threshold != nil { + in, out := &in.Threshold, &out.Threshold + *out = new(float64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Intent. +func (in *Intent) DeepCopy() *Intent { + if in == nil { + return nil + } + out := new(Intent) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelRef) DeepCopyInto(out *ModelRef) { + *out = *in + if in.Weight != nil { + in, out := &in.Weight, &out.Weight + *out = new(int32) + **out = **in + } + if in.Priority != nil { + in, out := &in.Priority, &out.Priority + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelRef. +func (in *ModelRef) DeepCopy() *ModelRef { + if in == nil { + return nil + } + out := new(ModelRef) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PIIDetectionConfig) DeepCopyInto(out *PIIDetectionConfig) { + *out = *in + if in.AllowByDefault != nil { + in, out := &in.AllowByDefault, &out.AllowByDefault + *out = new(bool) + **out = **in + } + if in.PIITypesAllowed != nil { + in, out := &in.PIITypesAllowed, &out.PIITypesAllowed + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Threshold != nil { + in, out := &in.Threshold, &out.Threshold + *out = new(float64) + **out = **in + } + if in.Action != nil { + in, out := &in.Action, &out.Action + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PIIDetectionConfig. +func (in *PIIDetectionConfig) DeepCopy() *PIIDetectionConfig { + if in == nil { + return nil + } + out := new(PIIDetectionConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PromptGuardConfig) DeepCopyInto(out *PromptGuardConfig) { + *out = *in + if in.Threshold != nil { + in, out := &in.Threshold, &out.Threshold + *out = new(float64) + **out = **in + } + if in.Action != nil { + in, out := &in.Action, &out.Action + *out = new(string) + **out = **in + } + if in.CustomRules != nil { + in, out := &in.CustomRules, &out.CustomRules + *out = make([]SecurityRule, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PromptGuardConfig. +func (in *PromptGuardConfig) DeepCopy() *PromptGuardConfig { + if in == nil { + return nil + } + out := new(PromptGuardConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ReasoningControlConfig) DeepCopyInto(out *ReasoningControlConfig) { + *out = *in + if in.ReasonFamily != nil { + in, out := &in.ReasonFamily, &out.ReasonFamily + *out = new(string) + **out = **in + } + if in.EnableReasoning != nil { + in, out := &in.EnableReasoning, &out.EnableReasoning + *out = new(bool) + **out = **in + } + if in.ReasoningEffort != nil { + in, out := &in.ReasoningEffort, &out.ReasoningEffort + *out = new(string) + **out = **in + } + if in.MaxReasoningSteps != nil { + in, out := &in.MaxReasoningSteps, &out.MaxReasoningSteps + *out = new(int32) + **out = **in + } + if in.ReasoningTimeout != nil { + in, out := &in.ReasoningTimeout, &out.ReasoningTimeout + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReasoningControlConfig. +func (in *ReasoningControlConfig) DeepCopy() *ReasoningControlConfig { + if in == nil { + return nil + } + out := new(ReasoningControlConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RouteRule) DeepCopyInto(out *RouteRule) { + *out = *in + if in.Intents != nil { + in, out := &in.Intents, &out.Intents + *out = make([]Intent, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.ModelRefs != nil { + in, out := &in.ModelRefs, &out.ModelRefs + *out = make([]ModelRef, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Filters != nil { + in, out := &in.Filters, &out.Filters + *out = make([]Filter, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.DefaultModel != nil { + in, out := &in.DefaultModel, &out.DefaultModel + *out = new(ModelRef) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RouteRule. +func (in *RouteRule) DeepCopy() *RouteRule { + if in == nil { + return nil + } + out := new(RouteRule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SecurityRule) DeepCopyInto(out *SecurityRule) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SecurityRule. +func (in *SecurityRule) DeepCopy() *SecurityRule { + if in == nil { + return nil + } + out := new(SecurityRule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SemanticCacheConfig) DeepCopyInto(out *SemanticCacheConfig) { + *out = *in + if in.SimilarityThreshold != nil { + in, out := &in.SimilarityThreshold, &out.SimilarityThreshold + *out = new(float64) + **out = **in + } + if in.MaxEntries != nil { + in, out := &in.MaxEntries, &out.MaxEntries + *out = new(int32) + **out = **in + } + if in.TTLSeconds != nil { + in, out := &in.TTLSeconds, &out.TTLSeconds + *out = new(int32) + **out = **in + } + if in.Backend != nil { + in, out := &in.Backend, &out.Backend + *out = new(string) + **out = **in + } + if in.BackendConfig != nil { + in, out := &in.BackendConfig, &out.BackendConfig + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticCacheConfig. +func (in *SemanticCacheConfig) DeepCopy() *SemanticCacheConfig { + if in == nil { + return nil + } + out := new(SemanticCacheConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SemanticRoute) DeepCopyInto(out *SemanticRoute) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRoute. +func (in *SemanticRoute) DeepCopy() *SemanticRoute { + if in == nil { + return nil + } + out := new(SemanticRoute) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SemanticRoute) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SemanticRouteList) DeepCopyInto(out *SemanticRouteList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]SemanticRoute, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRouteList. +func (in *SemanticRouteList) DeepCopy() *SemanticRouteList { + if in == nil { + return nil + } + out := new(SemanticRouteList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SemanticRouteList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SemanticRouteSpec) DeepCopyInto(out *SemanticRouteSpec) { + *out = *in + if in.Rules != nil { + in, out := &in.Rules, &out.Rules + *out = make([]RouteRule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRouteSpec. +func (in *SemanticRouteSpec) DeepCopy() *SemanticRouteSpec { + if in == nil { + return nil + } + out := new(SemanticRouteSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SemanticRouteStatus) DeepCopyInto(out *SemanticRouteStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SemanticRouteStatus. +func (in *SemanticRouteStatus) DeepCopy() *SemanticRouteStatus { + if in == nil { + return nil + } + out := new(SemanticRouteStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ToolSelectionConfig) DeepCopyInto(out *ToolSelectionConfig) { + *out = *in + if in.TopK != nil { + in, out := &in.TopK, &out.TopK + *out = new(int32) + **out = **in + } + if in.SimilarityThreshold != nil { + in, out := &in.SimilarityThreshold, &out.SimilarityThreshold + *out = new(float64) + **out = **in + } + if in.ToolsDBPath != nil { + in, out := &in.ToolsDBPath, &out.ToolsDBPath + *out = new(string) + **out = **in + } + if in.FallbackToEmpty != nil { + in, out := &in.FallbackToEmpty, &out.FallbackToEmpty + *out = new(bool) + **out = **in + } + if in.Categories != nil { + in, out := &in.Categories, &out.Categories + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Tags != nil { + in, out := &in.Tags, &out.Tags + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolSelectionConfig. +func (in *ToolSelectionConfig) DeepCopy() *ToolSelectionConfig { + if in == nil { + return nil + } + out := new(ToolSelectionConfig) + in.DeepCopyInto(out) + return out +} diff --git a/tools/make/golang.mk b/tools/make/golang.mk index 134c67e3..06441e20 100644 --- a/tools/make/golang.mk +++ b/tools/make/golang.mk @@ -26,3 +26,19 @@ check-go-mod-tidy: fi @echo "✅ src/semantic-router go mod tidy check passed" @echo "✅ All go mod tidy checks passed" + +# Controller-gen targets +install-controller-gen: + @echo "Installing controller-gen..." + @cd src/semantic-router && go install sigs.k8s.io/controller-tools/cmd/controller-gen@latest + +generate-crd: install-controller-gen + @echo "Generating CRD manifests..." + @cd src/semantic-router && controller-gen crd:crdVersions=v1,allowDangerousTypes=true paths=./pkg/apis/vllm.ai/v1alpha1 output:crd:artifacts:config=../../deploy/kubernetes/crds + +generate-deepcopy: install-controller-gen + @echo "Generating deepcopy methods..." + @cd src/semantic-router && controller-gen object:headerFile=./hack/boilerplate.go.txt paths=./pkg/apis/vllm.ai/v1alpha1 + +generate-api: generate-deepcopy generate-crd + @echo "Generated all API artifacts" \ No newline at end of file