elastic · davidkyle · Sep 26, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md
@@ -0,0 +1,88 @@
+# Add ContextualAI Rerank Service Implementation
+
+## Overview
+This PR adds a complete implementation of ContextualAI rerank service integration to Elasticsearch's inference plugin, following the established patterns used by other inference services (OpenAI, Cohere, etc.).
+
+## What's Implemented
+- **Complete ContextualAI Service**: Full service implementation with rerank task support
+- **Service Registration**: Integrated with InferencePlugin to register the service
+- **Request/Response Handling**: Proper HTTP client integration with ContextualAI API
+- **Configuration Management**: Service settings, task settings, and secret handling
+- **Rate Limiting**: Integrated with Elasticsearch's rate limiting infrastructure
+- **Instruction Support**: Added instruction parameter for guiding rerank behavior
+
+## Files Added/Modified
+- `ContextualAiService.java` - Main service implementation
+- `ContextualAiRerankModel.java` - Model representation
+- `ContextualAiRerankServiceSettings.java` - Service configuration
+- `ContextualAiRerankTaskSettings.java` - Task-specific settings with instruction support
+- `ContextualAiActionCreator.java` - Action creation logic
+- `ContextualAiRerankRequest.java` - HTTP request handling with debug logging
+- `ContextualAiRerankRequestEntity.java` - JSON serialization with proper field ordering
+- `ContextualAiRerankResponseEntity.java` - Response parsing
+- `InferencePlugin.java` - Service registration
+
+## Current Status: BLOCKED - Need Help
+
+### Problem
+The implementation compiles successfully and the service registers correctly, but I'm encountering validation issues during inference execution (POST requests). Specifically:
+
+```
+"error": {
+    "type": "validation_exception", 
+    "reason": "Validation Failed: 1: [service_settings] does not contain the required setting [model_id];"
+}
+```
+
+### Root Cause Analysis
+The issue appears to be in how `parseRequestConfig` is being called during POST inference requests. The system is incorrectly trying to validate service settings during inference execution when it should only validate them during endpoint registration (PUT requests).
+
+**What I've tried:**
+1. ✅ Fixed `ContextualAiRerankServiceSettings.fromMap()` to handle REQUEST vs PERSISTENT contexts differently
+2. ✅ Made `model_id` optional for REQUEST context (following Cohere service pattern)
+3. ✅ Updated constructor to handle nullable model_id
+4. ✅ Added proper debug logging to trace the request flow
+5. ❌ Still getting validation errors on POST requests
+
+### What I Need Help With
+
+1. **Architecture Guidance**: 
+   - Should `parseRequestConfig` be called at all for simple inference requests without task setting overrides?
+   - Is there a different code path that should be taken for POST vs PUT requests?
+
+2. **Pattern Clarification**:
+   - How do other services (OpenAI, Cohere) handle the distinction between registration and inference?
+   - Is there a specific interface method I should implement differently?
+
+3. **Debugging Assistance**:
+   - Where in the inference pipeline should I set breakpoints to trace why `parseRequestConfig` is being called?
+   - Are there specific validation steps I'm missing?
+
+### Expected Behavior
+**Registration (PUT)** should work:
+```json
+PUT /_inference/rerank/contextualai-reranker
+{
+  "service": "contextualai",
+  "service_settings": { "model_id": "ctxl-rerank-v2-instruct-multilingual" },
+  "secrets": { "api_key": "..." }
+}
+```
+
+**Inference (POST)** should work:
+```json
+POST /_inference/rerank/contextualai-reranker  
+{
+  "query": "search query",
+  "input": ["doc1", "doc2", "doc3"]
+}
+```
+
+### Additional Context
+- All ContextualAI files follow established patterns from other services
+- Service compiles and registers successfully
+- Debug logging shows proper JSON formatting for ContextualAI API
+- Authentication headers are properly configured
+- Rate limiting infrastructure is in place
+
+**Help needed**: Guidance on the proper inference request handling pattern and why validation is failing during POST requests.
diff --git a/debug_elasticsearch.sh b/debug_elasticsearch.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+# Debug Elasticsearch with ContextualAI Service
+# This script starts Elasticsearch in debug mode on port 5005
+
+set -e
+
+echo "Starting Elasticsearch in debug mode..."
+
+# Parse arguments: --f or -f triggers a full rebuild (clean + assemble)
+FULL_REBUILD=false
+for arg in "$@"; do
+    case "$arg" in
+        -f|--f|--full)
+            FULL_REBUILD=true
+            ;;
+    esac
+done
+
+if [ "$FULL_REBUILD" = true ]; then
+    echo "Performing full rebuild: clean + assemble"
+    #./gradlew :distribution:archives:darwin-tar:assemble --parallel :x-pack:plugin:inference:clean :x-pack:plugin:inference:processResources :x-pack:plugin:inference:compileJava
+    ./gradlew :x-pack:plugin:inference:clean :x-pack:plugin:inference:processResources :x-pack:plugin:inference:compileJava :distribution:archives:darwin-tar:assemble --no-parallel
+else
+    echo "Building Elasticsearch distribution for debugging (assemble only)..."
+    ./gradlew :distribution:archives:darwin-tar:assemble --parallel
+fi
+
+# Extract the distribution into the local debug folder (overwrite existing)
+mkdir -p build/distribution/local
+cd build/distribution/local
+rm -rf elasticsearch-9.2.0-SNAPSHOT
+tar -xzf ../../../distribution/archives/darwin-tar/build/distributions/elasticsearch-9.2.0-SNAPSHOT-darwin-x86_64.tar.gz
+cd ../../..
+
+# Kill any existing processes
+echo "Killing any existing Elasticsearch processes..."
+pkill -f elasticsearch || true
+sleep 2
+
+# Create debug config - disable security for easier debugging
+cd build/distribution/local/elasticsearch-9.2.0-SNAPSHOT
+
+# Clear all persistent data (cluster state, inference endpoints, indices, etc.)
+rm -rf data/
+echo "Cleared cluster state and data for fresh start"
+
+cat > config/elasticsearch.yml << EOF
+# Debug configuration - NO SECURITY for easier debugging
+xpack.security.enabled: false
+xpack.security.http.ssl.enabled: false
+xpack.security.transport.ssl.enabled: false
+network.host: localhost
+http.port: 9200
+cluster.name: elasticsearch-debug
+node.name: debug-node
+discovery.type: single-node
+xpack.ml.enabled: false
+EOF
+
+echo "Starting Elasticsearch with remote debugging enabled (NO SECURITY)..."
+echo "Debug port: 5005"
+echo "Connect your debugger to localhost:5005"
+
+# Set JVM debug options via ES_JAVA_OPTS
+export ES_JAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=*:5005"
+
+# Start with configuration options - NO SECURITY for debugging
+echo "Starting Elasticsearch in foreground with debug logging..."
+echo "Press Ctrl+C to stop"
+echo ""
+./bin/elasticsearch \
+    -E cluster.name=elasticsearch-debug \
+    -E node.name=debug-node \
+    -E discovery.type=single-node \
+    -E xpack.security.enabled=false \
+    -E network.host=localhost \
+    -E http.port=9200 \
+    -E xpack.ml.enabled=false \
+    -E logger.org.elasticsearch.xpack.inference=DEBUG \
+    -E logger.org.elasticsearch.xpack.inference.services.contextualai=TRACE \
+    -E logger.org.elasticsearch.xpack.inference.external=DEBUG \
+    -E logger.org.elasticsearch.xpack.inference.services=DEBUG \
+    -E logger.org.elasticsearch.action.admin.cluster.settings=DEBUG \
+    -E logger.org.elasticsearch.xpack.inference.services.validation=DEBUG
diff --git a/docs/changelog/134933.yaml b/docs/changelog/134933.yaml
@@ -0,0 +1,5 @@
+pr: 134933 
+summary: Add ContextualAI Rerank Service Implementation to the Inference API 
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java
@@ -127,6 +127,7 @@
 import org.elasticsearch.xpack.inference.services.azureaistudio.AzureAiStudioService;
 import org.elasticsearch.xpack.inference.services.azureopenai.AzureOpenAiService;
 import org.elasticsearch.xpack.inference.services.cohere.CohereService;
+import org.elasticsearch.xpack.inference.services.contextualai.ContextualAiService;
 import org.elasticsearch.xpack.inference.services.custom.CustomService;
 import org.elasticsearch.xpack.inference.services.deepseek.DeepSeekService;
 import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService;
@@ -409,6 +410,7 @@ public List<InferenceServiceExtension.Factory> getInferenceServiceFactories() {
             context -> new HuggingFaceService(httpFactory.get(), serviceComponents.get(), context),
             context -> new OpenAiService(httpFactory.get(), serviceComponents.get(), context),
             context -> new CohereService(httpFactory.get(), serviceComponents.get(), context),
+            context -> new ContextualAiService(httpFactory.get(), serviceComponents.get(), context),
             context -> new AzureOpenAiService(httpFactory.get(), serviceComponents.get(), context),
             context -> new AzureAiStudioService(httpFactory.get(), serviceComponents.get(), context),
             context -> new GoogleAiStudioService(httpFactory.get(), serviceComponents.get(), context),

diff --git a/.../main/java/org/elasticsearch/xpack/inference/services/contextualai/ContextualAiModel.java b/.../main/java/org/elasticsearch/xpack/inference/services/contextualai/ContextualAiModel.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.contextualai;
+
+import org.elasticsearch.common.settings.SecureString;
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.inference.ModelConfigurations;
+import org.elasticsearch.inference.ModelSecrets;
+import org.elasticsearch.inference.ServiceSettings;
+import org.elasticsearch.inference.TaskSettings;
+import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
+import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel;
+import org.elasticsearch.xpack.inference.services.ServiceUtils;
+import org.elasticsearch.xpack.inference.services.contextualai.action.ContextualAiActionVisitor;
+import org.elasticsearch.xpack.inference.services.settings.ApiKeySecrets;
+import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
+
+import java.net.URI;
+import java.util.Map;
+import java.util.Objects;
+
+public abstract class ContextualAiModel extends RateLimitGroupingModel {
+
+    private final SecureString apiKey;
+    private final ContextualAiRateLimitServiceSettings rateLimitServiceSettings;
+
+    public ContextualAiModel(
+        ModelConfigurations configurations,
+        ModelSecrets secrets,
+        @Nullable ApiKeySecrets apiKeySecrets,
+        ContextualAiRateLimitServiceSettings rateLimitServiceSettings
+    ) {
+        super(configurations, secrets);
+
+        this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings);
+        apiKey = ServiceUtils.apiKey(apiKeySecrets);
+    }
+
+    protected ContextualAiModel(ContextualAiModel model, TaskSettings taskSettings) {
+        super(model, taskSettings);
+
+        rateLimitServiceSettings = model.rateLimitServiceSettings();
+        apiKey = model.apiKey();
+    }
+
+    protected ContextualAiModel(ContextualAiModel model, ServiceSettings serviceSettings) {
+        super(model, serviceSettings);
+
+        rateLimitServiceSettings = model.rateLimitServiceSettings();
+        apiKey = model.apiKey();
+    }
+
+    public SecureString apiKey() {
+        return apiKey;
+    }
+
+    public ContextualAiRateLimitServiceSettings rateLimitServiceSettings() {
+        return rateLimitServiceSettings;
+    }
+
+    public abstract ExecutableAction accept(ContextualAiActionVisitor creator, Map<String, Object> taskSettings);
+
+    public RateLimitSettings rateLimitSettings() {
+        return rateLimitServiceSettings.rateLimitSettings();
+    }
+
+    public int rateLimitGroupingHash() {
+        return apiKey().hashCode();
+    }
+
+    public URI baseUri() {
+        return rateLimitServiceSettings.uri();
+    }
+}
diff --git a/...ticsearch/xpack/inference/services/contextualai/ContextualAiRateLimitServiceSettings.java b/...ticsearch/xpack/inference/services/contextualai/ContextualAiRateLimitServiceSettings.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.contextualai;
+
+import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
+
+import java.net.URI;
+
+public interface ContextualAiRateLimitServiceSettings {
+    RateLimitSettings rateLimitSettings();
+
+    URI uri();
+
+    String modelId();
+}
diff --git a/.../org/elasticsearch/xpack/inference/services/contextualai/ContextualAiResponseHandler.java b/.../org/elasticsearch/xpack/inference/services/contextualai/ContextualAiResponseHandler.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.contextualai;
+
+import org.elasticsearch.xpack.inference.external.http.HttpResult;
+import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler;
+import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser;
+import org.elasticsearch.xpack.inference.external.http.retry.RetryException;
+import org.elasticsearch.xpack.inference.external.request.Request;
+import org.elasticsearch.xpack.inference.services.contextualai.response.ContextualAiErrorResponseEntity;
+
+/**
+ * Response handler for ContextualAI API calls.
+ */
+public class ContextualAiResponseHandler extends BaseResponseHandler {
+
+    public ContextualAiResponseHandler(String requestType, ResponseParser parseFunction, boolean supportsStreaming) {
+        super(requestType, parseFunction, ContextualAiErrorResponseEntity::fromResponse, supportsStreaming);
+    }
+
+    @Override
+    protected void checkForFailureStatusCode(Request request, HttpResult result) throws RetryException {
+        if (result.isSuccessfulResponse()) {
+            return;
+        }
+
+        // handle error codes
+        int statusCode = result.response().getStatusLine().getStatusCode();
+        if (statusCode == 500) {
+            throw new RetryException(true, buildError(SERVER_ERROR, request, result));
+        } else if (statusCode > 500) {
+            throw new RetryException(false, buildError(SERVER_ERROR, request, result));
+        } else if (statusCode == 429) {
+            throw new RetryException(true, buildError(RATE_LIMIT, request, result));
+        } else if (statusCode == 401) {
+            throw new RetryException(false, buildError(AUTHENTICATION, request, result));
+        } else if (statusCode >= 300 && statusCode < 400) {
+            throw new RetryException(false, buildError(REDIRECTION, request, result));
+        } else {
+            throw new RetryException(false, buildError(UNSUCCESSFUL, request, result));
+        }
+    }
+}