diff --git a/.github/workflows/unit-test-e2e-testcases.yml b/.github/workflows/unit-test-e2e-testcases.yml new file mode 100644 index 000000000..8a6776a25 --- /dev/null +++ b/.github/workflows/unit-test-e2e-testcases.yml @@ -0,0 +1,322 @@ +name: E2E Testcases Unit Tests + +on: + pull_request: + paths: + - "e2e-tests/testcases/**" + - "src/semantic-router/pkg/classification/**" + - "src/semantic-router/pkg/config/**" + - "candle-binding/**" + - ".github/workflows/unit-test-e2e-testcases.yml" + push: + branches: + - main + workflow_dispatch: + +env: + GO_VERSION: '1.24' + RUST_VERSION: '1.90.0' + +jobs: + test-keyword-routing: + name: Keyword Routing Tests + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + cache-dependency-path: e2e-tests/testcases/go.sum + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: ${{ env.RUST_VERSION }} + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + candle-binding/target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Build Rust Candle Bindings + run: | + cd candle-binding + cargo build --release --no-default-features + ls -la target/release/ + + - name: Verify Rust library + run: | + if [ -f "candle-binding/target/release/libcandle_semantic_router.so" ]; then + echo "✅ Rust library built successfully" + ls -lh candle-binding/target/release/libcandle_semantic_router.so + else + echo "❌ Rust library not found" + exit 1 + fi + + - name: Run Keyword Routing Tests + env: + LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release + run: | + cd e2e-tests/testcases + echo "Running keyword routing tests..." + go test -v -coverprofile=coverage-keyword.out -covermode=atomic -coverpkg=github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification + + - name: Generate coverage report + if: always() + run: | + cd e2e-tests/testcases + go tool cover -func=coverage-keyword.out > coverage-summary.txt + echo "=== Full Coverage Summary ===" + cat coverage-summary.txt + + echo "" + echo "=== Keyword Classifier Coverage ===" + grep "keyword_classifier.go" coverage-summary.txt || echo "No keyword_classifier.go coverage found" + + # Extract coverage for keyword_classifier.go only + # Filter lines containing keyword_classifier.go, extract percentage, calculate average + KEYWORD_COVERAGE=$(grep "keyword_classifier.go" coverage-summary.txt | awk '{gsub(/%/, "", $NF); sum+=$NF; count++} END {if(count>0) printf "%.1f", sum/count; else print "0.0"}') + echo "Keyword Classifier Average Coverage: ${KEYWORD_COVERAGE}%" + echo "COVERAGE=${KEYWORD_COVERAGE}%" >> $GITHUB_ENV + + - name: Check coverage threshold + if: always() + run: | + cd e2e-tests/testcases + COVERAGE_PERCENT=$(echo $COVERAGE | sed 's/%//') + THRESHOLD=80 + + if (( $(echo "$COVERAGE_PERCENT < $THRESHOLD" | bc -l) )); then + echo "❌ Coverage $COVERAGE is below threshold ${THRESHOLD}%" + exit 1 + else + echo "✅ Coverage $COVERAGE meets threshold ${THRESHOLD}%" + fi + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + if: always() + with: + files: ./e2e-tests/testcases/coverage-keyword.out + flags: e2e-testcases-keyword + name: keyword-routing-coverage + fail_ci_if_error: false + + - name: Test Summary + if: always() + run: | + echo "### Keyword Routing Test Results :test_tube:" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Coverage:** $COVERAGE" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### Test Categories" >> $GITHUB_STEP_SUMMARY + echo "- ✅ OR operator tests" >> $GITHUB_STEP_SUMMARY + echo "- ✅ AND operator tests" >> $GITHUB_STEP_SUMMARY + echo "- ✅ NOR operator tests" >> $GITHUB_STEP_SUMMARY + echo "- ✅ Case sensitivity tests" >> $GITHUB_STEP_SUMMARY + echo "- ✅ Word boundary tests" >> $GITHUB_STEP_SUMMARY + echo "- ✅ Regex special character tests" >> $GITHUB_STEP_SUMMARY + echo "- ✅ Edge case tests" >> $GITHUB_STEP_SUMMARY + echo "- ✅ Multiple rule matching" >> $GITHUB_STEP_SUMMARY + echo "- ✅ Confidence score validation" >> $GITHUB_STEP_SUMMARY + echo "- ✅ JSON test data loading" >> $GITHUB_STEP_SUMMARY + echo "- ✅ Error handling" >> $GITHUB_STEP_SUMMARY + + test-embedding-routing: + name: Embedding Routing Tests + runs-on: ubuntu-latest + # Only run if embedding tests exist (for future PRs) + if: | + contains(github.event.pull_request.changed_files, 'e2e-tests/testcases/embedding_routing_test.go') || + github.event_name == 'workflow_dispatch' + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: ${{ env.RUST_VERSION }} + + - name: Build Rust Candle Bindings + run: | + cd candle-binding + cargo build --release --no-default-features + + - name: Run Embedding Routing Tests + env: + LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release + run: | + cd e2e-tests/testcases + if [ -f "embedding_routing_test.go" ] && ! [[ "$(basename embedding_routing_test.go)" =~ \.skip$ ]]; then + echo "Running embedding routing tests..." + go test -v -run "Embedding Routing" -coverprofile=coverage-embedding.out -covermode=atomic + else + echo "⏭️ Embedding routing tests not ready yet (skipped)" + fi + + test-hybrid-routing: + name: Hybrid Routing Tests + runs-on: ubuntu-latest + # Only run if hybrid tests exist (for future PRs) + if: | + contains(github.event.pull_request.changed_files, 'e2e-tests/testcases/hybrid_routing_test.go') || + github.event_name == 'workflow_dispatch' + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: ${{ env.RUST_VERSION }} + + - name: Build Rust Candle Bindings + run: | + cd candle-binding + cargo build --release --no-default-features + + - name: Run Hybrid Routing Tests + env: + LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release + run: | + cd e2e-tests/testcases + if [ -f "hybrid_routing_test.go" ] && ! [[ "$(basename hybrid_routing_test.go)" =~ \.skip$ ]]; then + echo "Running hybrid routing tests..." + go test -v -run "Hybrid Routing" -coverprofile=coverage-hybrid.out -covermode=atomic + else + echo "⏭️ Hybrid routing tests not ready yet (skipped)" + fi + + race-detection: + name: Race Condition Detection + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: ${{ env.RUST_VERSION }} + + - name: Build Rust Candle Bindings + run: | + cd candle-binding + cargo build --release --no-default-features + + - name: Run tests with race detector + env: + LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release + run: | + cd e2e-tests/testcases + echo "Running tests with race detector..." + go test -race -v || { + echo "❌ Race conditions detected!" + exit 1 + } + echo "✅ No race conditions detected" + + lint: + name: Lint Go Code + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - name: Run golangci-lint + uses: golangci/golangci-lint-action@v6 + with: + version: latest + working-directory: e2e-tests/testcases + args: --timeout=5m + + summary: + name: Test Summary + if: always() + runs-on: ubuntu-latest + needs: [test-keyword-routing, race-detection, lint] + + steps: + - name: Check test results + run: | + echo "=== E2E Testcases Summary ===" + echo "Keyword Routing Tests: ${{ needs.test-keyword-routing.result }}" + echo "Race Detection: ${{ needs.race-detection.result }}" + echo "Lint: ${{ needs.lint.result }}" + + # Count failures + FAILURES=0 + if [[ "${{ needs.test-keyword-routing.result }}" == "failure" ]]; then + echo "❌ Keyword routing tests failed" + FAILURES=$((FAILURES + 1)) + fi + if [[ "${{ needs.race-detection.result }}" == "failure" ]]; then + echo "❌ Race detection failed" + FAILURES=$((FAILURES + 1)) + fi + if [[ "${{ needs.lint.result }}" == "failure" ]]; then + echo "❌ Lint failed" + FAILURES=$((FAILURES + 1)) + fi + + echo "" + echo "=== Test Coverage (Issue #667) ===" + echo "✅ OR operator - any keyword matches" + echo "✅ AND operator - all keywords must match" + echo "✅ NOR operator - no keywords match" + echo "✅ Case-sensitive vs case-insensitive matching" + echo "✅ Regex pattern matching" + echo "✅ Word boundary detection" + echo "✅ Priority over embedding and intent-based routing" + + if [ $FAILURES -gt 0 ]; then + echo "" + echo "❌ $FAILURES test(s) failed. Check the logs for details." + exit 1 + else + echo "" + echo "✅ All E2E testcases passed!" + fi diff --git a/e2e-tests/testcases/go.mod b/e2e-tests/testcases/go.mod new file mode 100644 index 000000000..3f6d8736b --- /dev/null +++ b/e2e-tests/testcases/go.mod @@ -0,0 +1,48 @@ +module github.com/vllm-project/semantic-router/e2e-tests/testcases + +go 1.24.1 + +require ( + github.com/onsi/ginkgo/v2 v2.23.4 + github.com/onsi/gomega v1.38.0 + github.com/vllm-project/semantic-router/src/semantic-router v0.0.0 +) + +require ( + github.com/bahlo/generic-list-go v0.2.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/buger/jsonparser v1.1.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/invopop/jsonschema v0.13.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/mark3labs/mcp-go v0.42.0-beta.1 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_golang v1.23.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.65.0 // indirect + github.com/prometheus/procfs v0.16.1 // indirect + github.com/spf13/cast v1.7.1 // indirect + github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000 // indirect + github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + go.uber.org/automaxprocs v1.6.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/net v0.43.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/text v0.28.0 // indirect + golang.org/x/tools v0.35.0 // indirect + google.golang.org/protobuf v1.36.9 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) + +replace ( + github.com/vllm-project/semantic-router/candle-binding => ../../candle-binding + github.com/vllm-project/semantic-router/src/semantic-router => ../../src/semantic-router +) diff --git a/e2e-tests/testcases/go.sum b/e2e-tests/testcases/go.sum new file mode 100644 index 000000000..60e1c796d --- /dev/null +++ b/e2e-tests/testcases/go.sum @@ -0,0 +1,88 @@ +github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= +github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= +github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E= +github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mark3labs/mcp-go v0.42.0-beta.1 h1:jXCUOg7vHwSuknzy4hPvOXASnzmLluM3AMx1rPh/OYM= +github.com/mark3labs/mcp-go v0.42.0-beta.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= +github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= +github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY= +github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= +github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= +github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= +github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw= +github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0= +github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= +github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= +github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= +golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= +google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= +google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/e2e-tests/testcases/helpers.go b/e2e-tests/testcases/helpers.go new file mode 100644 index 000000000..02acc3658 --- /dev/null +++ b/e2e-tests/testcases/helpers.go @@ -0,0 +1,163 @@ +package testcases + +import ( + "encoding/json" + "os" + + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config" +) + +// KeywordTestCase represents a test case for keyword routing +type KeywordTestCase struct { + Name string `json:"name"` + Description string `json:"description"` + Query string `json:"query"` + ExpectedCategory string `json:"expected_category"` + ExpectedConfidence float64 `json:"expected_confidence"` + MatchedKeywords []string `json:"matched_keywords"` +} + +// EmbeddingTestCase represents a test case for embedding-based routing +type EmbeddingTestCase struct { + Name string `json:"name"` + Description string `json:"description"` + Query string `json:"query"` + ExpectedCategory string `json:"expected_category"` + MinSimilarity float64 `json:"min_similarity"` + AggregationMethod string `json:"aggregation_method"` + ModelType string `json:"model_type"` +} + +// HybridTestCase represents a test case for hybrid routing (priority testing) +type HybridTestCase struct { + Name string `json:"name"` + Description string `json:"description"` + Query string `json:"query"` + ExpectedCategory string `json:"expected_category"` + ExpectedRoutingMethod string `json:"expected_routing_method"` // "keyword", "embedding", "mcp" + ExpectedConfidence float64 `json:"expected_confidence"` +} + +// EntropyTestCase represents a test case for entropy-based routing +type EntropyTestCase struct { + Name string `json:"name"` + Description string `json:"description"` + Query string `json:"query"` + ExpectedEntropy float64 `json:"expected_entropy"` + ExpectedReasoning bool `json:"expected_reasoning"` + EntropyThreshold float64 `json:"entropy_threshold"` +} + +// ReasoningControlTestCase represents a test case for reasoning control +type ReasoningControlTestCase struct { + Name string `json:"name"` + Description string `json:"description"` + Query string `json:"query"` + Category string `json:"category"` + ExpectedReasoning bool `json:"expected_reasoning"` + EffortLevel string `json:"effort_level"` + ModelFamily string `json:"model_family"` +} + +// ToolSelectionTestCase represents a test case for tool selection +type ToolSelectionTestCase struct { + Name string `json:"name"` + Description string `json:"description"` + Query string `json:"query"` + ExpectedTools []string `json:"expected_tools"` + TopK int `json:"top_k"` + SimilarityThreshold float64 `json:"similarity_threshold"` +} + +// LoadKeywordTestCases loads keyword test cases from a JSON file +func LoadKeywordTestCases(path string) ([]KeywordTestCase, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var cases []KeywordTestCase + err = json.Unmarshal(data, &cases) + return cases, err +} + +// LoadEmbeddingTestCases loads embedding test cases from a JSON file +func LoadEmbeddingTestCases(path string) ([]EmbeddingTestCase, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var cases []EmbeddingTestCase + err = json.Unmarshal(data, &cases) + return cases, err +} + +// LoadHybridTestCases loads hybrid routing test cases from a JSON file +func LoadHybridTestCases(path string) ([]HybridTestCase, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var cases []HybridTestCase + err = json.Unmarshal(data, &cases) + return cases, err +} + +// CreateKeywordTestRules creates standard keyword rules for testing +// Note: Rules are evaluated in order. NOR rule is last to avoid matching everything. +func CreateKeywordTestRules() []config.KeywordRule { + return []config.KeywordRule{ + { + Name: "urgent_request", + Operator: "OR", + Keywords: []string{"urgent", "immediate", "asap", "emergency"}, + CaseSensitive: false, + }, + { + Name: "sensitive_data", + Operator: "AND", + Keywords: []string{"SSN", "credit card"}, + CaseSensitive: false, + }, + { + Name: "case_sensitive_test", + Operator: "OR", + Keywords: []string{"SECRET"}, + CaseSensitive: true, + }, + { + Name: "secret_detection", + Operator: "OR", + Keywords: []string{"secret"}, + CaseSensitive: false, + }, + { + Name: "version_check", + Operator: "OR", + Keywords: []string{"1.0", "2.0", "3.0"}, + CaseSensitive: false, + }, + { + Name: "wildcard_test", + Operator: "OR", + Keywords: []string{"*"}, + CaseSensitive: false, + }, + // NOR rule at end - matches when NO spam keywords present + // This will match most text, so it's placed last + { + Name: "spam", + Operator: "NOR", + Keywords: []string{"buy now", "free money", "click here"}, + CaseSensitive: false, + }, + } +} + +// CreateTestKeywordClassifier creates a keyword classifier instance for testing +func CreateTestKeywordClassifier(rules []config.KeywordRule) (*classification.KeywordClassifier, error) { + return classification.NewKeywordClassifier(rules) +} diff --git a/e2e-tests/testcases/keyword_routing_test.go b/e2e-tests/testcases/keyword_routing_test.go new file mode 100644 index 000000000..29e97b4fc --- /dev/null +++ b/e2e-tests/testcases/keyword_routing_test.go @@ -0,0 +1,505 @@ +package testcases + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config" +) + +var _ = Describe("Keyword Routing", func() { + var ( + classifier *classification.KeywordClassifier + rules []config.KeywordRule + rulesWithoutNOR []config.KeywordRule + ) + + BeforeEach(func() { + // Get all rules including NOR + allRules := CreateKeywordTestRules() + + // Create version without NOR for tests that expect empty results + rulesWithoutNOR = []config.KeywordRule{} + for _, rule := range allRules { + if rule.Operator != "NOR" { + rulesWithoutNOR = append(rulesWithoutNOR, rule) + } + } + + // By default, use rules without NOR + // Tests that specifically test NOR will create their own classifier + rules = rulesWithoutNOR + var err error + classifier, err = CreateTestKeywordClassifier(rules) + Expect(err).NotTo(HaveOccurred()) + Expect(classifier).NotTo(BeNil()) + }) + + Context("OR Operator", func() { + It("should match when any keyword is present", func() { + testCases := []struct { + query string + expectedCategory string + }{ + {"I need urgent help", "urgent_request"}, + {"This is an immediate issue", "urgent_request"}, + {"Please respond asap", "urgent_request"}, + {"This is an emergency situation", "urgent_request"}, + } + + for _, tc := range testCases { + category, confidence, err := classifier.Classify(tc.query) + Expect(err).NotTo(HaveOccurred(), "Query: %s", tc.query) + Expect(category).To(Equal(tc.expectedCategory), + "Query '%s' should match category %s", tc.query, tc.expectedCategory) + Expect(confidence).To(Equal(1.0), "Keyword matches should have 100%% confidence") + } + }) + + It("should not match when no keywords are present", func() { + category, _, err := classifier.Classify("Just a normal query") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(BeEmpty()) + }) + + It("should be case-insensitive when configured", func() { + testCases := []string{ + "This is URGENT", + "This is Urgent", + "This is urgent", + "This is UrGeNt", + } + + for _, query := range testCases { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request"), + "Query '%s' should match case-insensitively", query) + } + }) + + It("should match keyword at beginning of text", func() { + category, _, err := classifier.Classify("Urgent: please help") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request")) + }) + + It("should match keyword at end of text", func() { + category, _, err := classifier.Classify("Please help, this is urgent") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request")) + }) + + It("should match keyword in middle of text", func() { + category, _, err := classifier.Classify("This is an urgent matter that needs attention") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request")) + }) + }) + + Context("AND Operator", func() { + It("should match when all keywords are present", func() { + query := "My SSN and credit card were stolen" + category, confidence, err := classifier.Classify(query) + + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("sensitive_data")) + Expect(confidence).To(Equal(1.0)) + }) + + It("should not match when only some keywords are present", func() { + queries := []string{ + "My SSN was stolen", // Only SSN + "My credit card was stolen", // Only credit card + "Something else entirely", // Neither + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).NotTo(Equal("sensitive_data"), + "Query '%s' should not match AND rule", query) + } + }) + + It("should match regardless of keyword order", func() { + queries := []string{ + "My SSN and credit card", + "My credit card and SSN", + "SSN credit card stolen", + "credit card and SSN compromised", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("sensitive_data")) + } + }) + + It("should match with keywords far apart in text", func() { + query := "My SSN was compromised yesterday, and today I noticed my credit card was also affected" + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("sensitive_data")) + }) + + It("should match with repeated keywords", func() { + query := "SSN SSN credit card credit card" + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("sensitive_data")) + }) + }) + + Context("NOR Operator", func() { + var norClassifier *classification.KeywordClassifier + + BeforeEach(func() { + // Create classifier with ALL rules including NOR for these tests + allRules := CreateKeywordTestRules() + var err error + norClassifier, err = CreateTestKeywordClassifier(allRules) + Expect(err).NotTo(HaveOccurred()) + Expect(norClassifier).NotTo(BeNil()) + }) + + It("should match spam when no forbidden keywords are present", func() { + // NOR matches when NONE of the keywords are found + queries := []string{ + "How do I reset my password?", + "What is the capital of France?", + "Can you help me with my account?", + } + + for _, query := range queries { + category, confidence, err := norClassifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("spam"), + "Query '%s' should match spam via NOR (no spam keywords present)", query) + Expect(confidence).To(Equal(1.0)) + } + }) + + It("should not match spam when any forbidden keyword is present", func() { + // NOR does NOT match when any keyword is found + queries := []string{ + "Buy now and save!", + "Click here for free money", + "Free money available now", + "Buy now, click here for free money", + } + + for _, query := range queries { + category, _, err := norClassifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).NotTo(Equal("spam"), + "Query '%s' should NOT match spam via NOR (spam keywords present)", query) + } + }) + }) + + Context("Case Sensitivity", func() { + It("should match exact case when case-sensitive enabled", func() { + category, _, err := classifier.Classify("This is SECRET") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("case_sensitive_test")) + }) + + It("should not match different case when case-sensitive enabled", func() { + queries := []string{ + "This is secret", + "This is Secret", + "This is sEcReT", + "This is seCRet", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).NotTo(Equal("case_sensitive_test"), + "Query '%s' should not match case-sensitive rule", query) + } + }) + + It("should handle case-insensitive rules correctly", func() { + // secret_detection has case_sensitive: false + // Use lowercase to avoid matching case_sensitive_test first + queries := []string{ + "This is secret", + "This is Secret", + "This is sEcReT", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("secret_detection"), + "Query '%s' should match case-insensitive secret_detection", query) + } + }) + }) + + Context("Word Boundaries", func() { + It("should respect word boundaries - positive case", func() { + queries := []string{ + "This is a secret", + "The secret is safe", + "secret meeting", + "A secret!", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("secret_detection"), + "Query '%s' should match secret as whole word", query) + } + }) + + It("should respect word boundaries - negative case", func() { + queries := []string{ + "Talk to my secretary", + "The secretariat is here", + "Secretive behavior", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).NotTo(Equal("secret_detection"), + "Query '%s' should not match secret in partial word", query) + } + }) + + It("should handle word boundaries with punctuation", func() { + queries := []string{ + "secret.", + "secret!", + "secret?", + "secret,", + "(secret)", + "\"secret\"", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("secret_detection"), + "Query '%s' should match secret with punctuation", query) + } + }) + }) + + Context("Regex Special Characters", func() { + It("should handle dots literally", func() { + queries := []string{ + "Version 1.0 released", + "Using 2.0 now", + "3.0 is coming", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("version_check"), + "Query '%s' should match version with literal dot", query) + } + }) + + It("should not match dots as wildcard", func() { + // 1.0 should match literally, not 1X0 + category, _, err := classifier.Classify("Version 1X0") + Expect(err).NotTo(HaveOccurred()) + Expect(category).NotTo(Equal("version_check")) + }) + + It("should handle asterisks literally", func() { + queries := []string{ + "The symbol * is here", + "Use * wildcard", + "asterisk *", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("wildcard_test"), + "Query '%s' should match asterisk literally", query) + } + }) + }) + + Context("Edge Cases", func() { + It("should handle empty text", func() { + category, _, err := classifier.Classify("") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(BeEmpty()) + }) + + It("should handle whitespace-only text", func() { + queries := []string{ + " ", + "\t\t", + "\n\n", + " \t\n ", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(BeEmpty()) + } + }) + + It("should handle very long text", func() { + longPrefix := "This is normal text that goes on and on. " + longSuffix := "More normal text. " + var longText string + for i := 0; i < 100; i++ { + longText += longPrefix + } + longText += "urgent " + for i := 0; i < 100; i++ { + longText += longSuffix + } + + category, _, err := classifier.Classify(longText) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request")) + }) + + It("should handle Unicode characters", func() { + queries := []string{ + "需要 urgent 帮助", + "緊急 urgent 事項", + "срочно urgent помощь", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request"), + "Query '%s' should match with Unicode", query) + } + }) + + It("should handle emoji", func() { + queries := []string{ + "🚨 urgent 🚨", + "😱 urgent help 😱", + "⚠️ urgent ⚠️", + } + + for _, query := range queries { + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request"), + "Query '%s' should match with emoji", query) + } + }) + + It("should handle newlines in text", func() { + query := "This is\nurgent\nhelp" + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request")) + }) + + It("should handle tabs in text", func() { + query := "This is\turgent\thelp" + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request")) + }) + }) + + Context("Multiple Rule Matching", func() { + It("should use first matching rule when multiple rules match", func() { + // Add overlapping rules + overlappingRules := []config.KeywordRule{ + {Name: "rule1", Operator: "OR", Keywords: []string{"urgent"}, CaseSensitive: false}, + {Name: "rule2", Operator: "OR", Keywords: []string{"urgent"}, CaseSensitive: false}, + } + newClassifier, err := CreateTestKeywordClassifier(overlappingRules) + Expect(err).NotTo(HaveOccurred()) + + category, _, err := newClassifier.Classify("urgent request") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("rule1"), "Should match first rule") + }) + + It("should handle multiple different keywords matching", func() { + query := "This is urgent and also an emergency" + category, _, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(Equal("urgent_request")) + }) + }) + + Context("Confidence Scores", func() { + It("should always return confidence 1.0 for keyword matches", func() { + testCases := []string{ + "urgent", + "This is urgent", + "URGENT", + "My SSN and credit card", + } + + for _, query := range testCases { + _, confidence, err := classifier.Classify(query) + Expect(err).NotTo(HaveOccurred()) + Expect(confidence).To(Equal(1.0), + "Query '%s' should have confidence 1.0", query) + } + }) + }) + + Context("Loading from JSON test data", func() { + It("should pass all test cases from JSON file", func() { + testCases, err := LoadKeywordTestCases("testdata/keyword_routing_cases.json") + if err != nil { + Skip("Test data file not found: " + err.Error()) + return + } + + for _, tc := range testCases { + category, confidence, err := classifier.Classify(tc.Query) + Expect(err).NotTo(HaveOccurred(), "Test: %s - %s", tc.Name, tc.Description) + + if tc.ExpectedCategory != "" { + Expect(category).To(Equal(tc.ExpectedCategory), + "Test: %s - Query: %s", tc.Name, tc.Query) + } + + if tc.ExpectedConfidence > 0 { + Expect(confidence).To(Equal(tc.ExpectedConfidence), + "Test: %s - Query: %s", tc.Name, tc.Query) + } + } + }) + }) + + Context("Error Handling", func() { + It("should handle invalid operator gracefully", func() { + invalidRules := []config.KeywordRule{ + {Name: "invalid", Operator: "INVALID", Keywords: []string{"test"}, CaseSensitive: false}, + } + _, err := CreateTestKeywordClassifier(invalidRules) + Expect(err).To(HaveOccurred()) + }) + + It("should handle empty keywords array", func() { + emptyRules := []config.KeywordRule{ + {Name: "empty", Operator: "OR", Keywords: []string{}, CaseSensitive: false}, + } + newClassifier, err := CreateTestKeywordClassifier(emptyRules) + Expect(err).NotTo(HaveOccurred()) + + category, _, err := newClassifier.Classify("any text") + Expect(err).NotTo(HaveOccurred()) + Expect(category).To(BeEmpty()) + }) + }) +}) diff --git a/e2e-tests/testcases/suite_test.go b/e2e-tests/testcases/suite_test.go new file mode 100644 index 000000000..086554309 --- /dev/null +++ b/e2e-tests/testcases/suite_test.go @@ -0,0 +1,25 @@ +package testcases + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// TestE2ETestcases is the entry point for the Ginkgo test suite +func TestE2ETestcases(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "E2E Testcases Suite") +} + +// Suite-level setup +var _ = BeforeSuite(func() { + // Initialize any suite-level resources + // e.g., models, databases, etc. +}) + +// Suite-level cleanup +var _ = AfterSuite(func() { + // Cleanup suite-level resources +}) diff --git a/e2e-tests/testcases/testdata/keyword_routing_cases.json b/e2e-tests/testcases/testdata/keyword_routing_cases.json new file mode 100644 index 000000000..c28a4a8cb --- /dev/null +++ b/e2e-tests/testcases/testdata/keyword_routing_cases.json @@ -0,0 +1,210 @@ +[ + { + "name": "OR operator - single keyword urgent", + "description": "Test OR operator with 'urgent' keyword", + "query": "I need urgent help with my account", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "OR operator - immediate keyword", + "description": "Test OR operator with 'immediate' keyword", + "query": "This requires immediate attention", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["immediate"] + }, + { + "name": "OR operator - asap keyword", + "description": "Test OR operator with 'asap' keyword", + "query": "Please respond asap to this request", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["asap"] + }, + { + "name": "OR operator - emergency keyword", + "description": "Test OR operator with 'emergency' keyword", + "query": "This is an emergency situation", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["emergency"] + }, + { + "name": "AND operator - all keywords present", + "description": "Test AND operator with SSN and credit card", + "query": "My SSN and credit card were stolen", + "expected_category": "sensitive_data", + "expected_confidence": 1.0, + "matched_keywords": ["SSN", "credit card"] + }, + { + "name": "AND operator - partial match fails (only SSN)", + "description": "Test AND operator with only one keyword", + "query": "My SSN was stolen", + "expected_category": "", + "expected_confidence": 0.0, + "matched_keywords": [] + }, + { + "name": "AND operator - partial match fails (only credit card)", + "description": "Test AND operator with only credit card keyword", + "query": "My credit card was stolen", + "expected_category": "", + "expected_confidence": 0.0, + "matched_keywords": [] + }, + { + "name": "AND operator - keywords in different order", + "description": "Test AND operator with reversed keyword order", + "query": "My credit card and SSN were compromised", + "expected_category": "sensitive_data", + "expected_confidence": 1.0, + "matched_keywords": ["SSN", "credit card"] + }, + { + "name": "Case insensitive - uppercase URGENT", + "description": "Test case insensitive matching with uppercase", + "query": "This is URGENT", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Case insensitive - mixed case Urgent", + "description": "Test case insensitive matching with mixed case", + "query": "This is Urgent", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Case insensitive - mixed case UrGeNt", + "description": "Test case insensitive matching with random case", + "query": "This is UrGeNt", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Case sensitive - exact match SECRET", + "description": "Test case sensitive matching with exact case", + "query": "This is SECRET information", + "expected_category": "case_sensitive_test", + "expected_confidence": 1.0, + "matched_keywords": ["SECRET"] + }, + { + "name": "Case sensitive - lowercase fails", + "description": "Test case sensitive matching fails with lowercase", + "query": "This is secret information", + "expected_category": "secret_detection", + "expected_confidence": 1.0, + "matched_keywords": ["secret"] + }, + { + "name": "Word boundary - secret in sentence", + "description": "Test word boundary with whole word match", + "query": "This is a secret meeting", + "expected_category": "secret_detection", + "expected_confidence": 1.0, + "matched_keywords": ["secret"] + }, + { + "name": "Word boundary - secretary no match", + "description": "Test word boundary preventing partial word match", + "query": "Talk to my secretary about this", + "expected_category": "", + "expected_confidence": 0.0, + "matched_keywords": [] + }, + { + "name": "Word boundary - secretariat no match", + "description": "Test word boundary with secretariat", + "query": "The secretariat handles this", + "expected_category": "", + "expected_confidence": 0.0, + "matched_keywords": [] + }, + { + "name": "Regex special char - version 1.0", + "description": "Test literal dot matching in version number", + "query": "We are using version 1.0 now", + "expected_category": "version_check", + "expected_confidence": 1.0, + "matched_keywords": ["1.0"] + }, + { + "name": "Regex special char - version 2.0", + "description": "Test literal dot matching with version 2.0", + "query": "Upgrade to version 2.0 soon", + "expected_category": "version_check", + "expected_confidence": 1.0, + "matched_keywords": ["2.0"] + }, + { + "name": "Regex special char - asterisk literal", + "description": "Test literal asterisk matching", + "query": "The symbol * represents wildcard", + "expected_category": "wildcard_test", + "expected_confidence": 1.0, + "matched_keywords": ["*"] + }, + { + "name": "Edge case - keyword with punctuation", + "description": "Test keyword matching with surrounding punctuation", + "query": "This is urgent!", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Edge case - keyword at beginning", + "description": "Test keyword at start of text", + "query": "Urgent: we need help now", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Edge case - keyword at end", + "description": "Test keyword at end of text", + "query": "Please help, this is urgent", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Edge case - multiple matching keywords", + "description": "Test multiple keywords from same rule matching", + "query": "This is urgent and an emergency", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Edge case - keyword with newline", + "description": "Test keyword matching across newlines", + "query": "This is\nurgent\nhelp", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Edge case - keyword with Unicode", + "description": "Test keyword matching with surrounding Unicode", + "query": "需要 urgent 帮助", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + }, + { + "name": "Edge case - keyword with emoji", + "description": "Test keyword matching with surrounding emoji", + "query": "🚨 urgent 🚨", + "expected_category": "urgent_request", + "expected_confidence": 1.0, + "matched_keywords": ["urgent"] + } +]