vllm-project
diff --git a/‎openvino-binding/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎openvino-binding/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎openvino-binding/README.md‎
Lines changed: 1 addition & 0 deletions b/‎openvino-binding/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎openvino-binding/cpp/include/classifiers/lora_adapter.h‎
Lines changed: 73 additions & 0 deletions b/‎openvino-binding/cpp/include/classifiers/lora_adapter.h‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎openvino-binding/cpp/include/classifiers/lora_classifier.h‎
Lines changed: 177 additions & 0 deletions b/‎openvino-binding/cpp/include/classifiers/lora_classifier.h‎
Lines changed: 177 additions & 0 deletions
diff --git a/‎openvino-binding/cpp/include/openvino_semantic_router.h‎
Lines changed: 85 additions & 0 deletions b/‎openvino-binding/cpp/include/openvino_semantic_router.h‎
Lines changed: 85 additions & 0 deletions
@@ -103,6 +103,8 @@ set(SOURCES
     # Classifiers module
     cpp/src/classifiers/text_classifier.cpp
     cpp/src/classifiers/token_classifier.cpp
+    cpp/src/classifiers/lora_adapter.cpp
+    cpp/src/classifiers/lora_classifier.cpp
 
     # Embeddings module
     cpp/src/embeddings/embedding_generator.cpp
@@ -123,6 +125,8 @@ set(HEADERS
     # Classifier headers
     cpp/include/classifiers/text_classifier.h
     cpp/include/classifiers/token_classifier.h
+    cpp/include/classifiers/lora_adapter.h
+    cpp/include/classifiers/lora_classifier.h
 
     # Embedding headers
     cpp/include/embeddings/embedding_generator.h
 
@@ -7,6 +7,7 @@ High-performance Go bindings for semantic routing using Intel® OpenVINO™ Tool
 - 🚀 **High Performance**: Optimized inference with OpenVINO on Intel hardware
 - 🔍 **Semantic Search**: BERT embeddings and cosine similarity
 - 📊 **Classification**: Text classification with confidence scores
+- 🧩 **LoRA Adapter Support**: Parameter-efficient fine-tuning for BERT and ModernBERT
 - 🏷️ **Token Classification**: Named entity recognition and PII detection
 - 🔄 **Batch Processing**: Efficient batch similarity computation
 - 💻 **Multi-Device**: Support for CPU, GPU, VPU, and other Intel accelerators
 
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <openvino/openvino.hpp>
+#include <vector>
+#include <memory>
+#include <string>
+
+namespace openvino_sr {
+namespace classifiers {
+
+/**
+ * @brief LoRA configuration
+ */
+struct LoRAConfig {
+    size_t rank = 16;                    // LoRA rank
+    double alpha = 32.0;                 // LoRA alpha for scaling
+    double dropout = 0.1;                // Dropout rate (used during training)
+    bool use_bias = false;               // Whether to use bias in LoRA layers
+    
+    double get_scaling() const {
+        return alpha / static_cast<double>(rank);
+    }
+};
+
+/**
+ * @brief LoRA adapter for parameter-efficient fine-tuning
+ * 
+ * Implements Low-Rank Adaptation by applying:
+ * output = input + LoRA_B(LoRA_A(input)) * scaling
+ */
+class LoRAAdapter {
+public:
+    LoRAAdapter() = default;
+    
+    /**
+     * @brief Load LoRA adapter from OpenVINO IR model
+     * @param adapter_model_path Path to LoRA adapter model (.xml file)
+     * @param config LoRA configuration
+     * @param device Device name ("CPU", "GPU", etc.)
+     * @return true if successful
+     */
+    bool load(
+        const std::string& adapter_model_path,
+        const LoRAConfig& config,
+        const std::string& device
+    );
+    
+    /**
+     * @brief Apply LoRA adapter to input tensor
+     * @param input Input tensor (pooled output from BERT/ModernBERT)
+     * @return Output tensor after LoRA transformation
+     */
+    ov::Tensor forward(const ov::Tensor& input);
+    
+    /**
+     * @brief Check if adapter is loaded
+     */
+    bool isLoaded() const { return compiled_model_ != nullptr; }
+    
+    /**
+     * @brief Get LoRA configuration
+     */
+    const LoRAConfig& getConfig() const { return config_; }
+    
+private:
+    std::shared_ptr<ov::CompiledModel> compiled_model_;
+    LoRAConfig config_;
+    ov::InferRequest infer_request_;
+};
+
+} // namespace classifiers
+} // namespace openvino_sr
+
@@ -0,0 +1,177 @@
+#pragma once
+
+#include "../core/types.h"
+#include "../core/tokenizer.h"
+#include "lora_adapter.h"
+#include <string>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+namespace openvino_sr {
+namespace classifiers {
+
+/**
+ * @brief Task types for LoRA multi-task classification
+ */
+enum class TaskType {
+    Intent,
+    PII,
+    Security,
+    Classification
+};
+
+/**
+ * @brief Token-level prediction for token classification models
+ */
+struct TokenPrediction {
+    std::string token;          // The token text
+    int class_id;               // Predicted class ID
+    float confidence;           // Confidence score (0.0 to 1.0)
+};
+
+/**
+ * @brief Detected entity from BIO tagging
+ */
+struct DetectedEntity {
+    std::string type;           // Entity type (e.g., "EMAIL_ADDRESS", "PERSON")
+    std::string text;           // The detected entity text
+    int start_token;            // Start token index
+    int end_token;              // End token index (inclusive)
+    float confidence;           // Average confidence of tokens in entity
+};
+
+/**
+ * @brief Token classification result
+ */
+struct TokenClassificationResult {
+    std::vector<TokenPrediction> token_predictions;  // Per-token predictions
+    std::vector<DetectedEntity> entities;            // Detected entities (aggregated from BIO tags)
+    float processing_time_ms;                        // Processing time in milliseconds
+};
+
+/**
+ * @brief LoRA-enabled classifier for BERT and ModernBERT
+ * 
+ * Supports multi-task classification with parameter-efficient LoRA adapters.
+ * Each task has its own LoRA adapter and classification head.
+ */
+class LoRAClassifier {
+public:
+    LoRAClassifier() = default;
+    
+    /**
+     * @brief Initialize LoRA classifier with base model and adapters
+     * @param base_model_path Path to base BERT/ModernBERT model (.xml file)
+     * @param lora_adapters_path Path to directory containing LoRA adapter models
+     * @param task_configs Map of task types to number of classes
+     * @param device Device name ("CPU", "GPU", etc.)
+     * @param model_type "bert" or "modernbert"
+     * @return true if successful
+     */
+    bool initialize(
+        const std::string& base_model_path,
+        const std::string& lora_adapters_path,
+        const std::unordered_map<TaskType, int>& task_configs,
+        const std::string& device = "CPU",
+        const std::string& model_type = "bert"
+    );
+    
+    /**
+     * @brief Classify text for a specific task (sequence classification)
+     * @param text Input text
+     * @param task Task type
+     * @return Classification result
+     */
+    core::ClassificationResult classifyTask(const std::string& text, TaskType task);
+    
+    /**
+     * @brief Classify tokens for token-level classification (e.g., NER, PII detection)
+     * @param text Input text
+     * @param task Task type (should be PII or similar token classification task)
+     * @return Token classification result with per-token predictions and detected entities
+     */
+    TokenClassificationResult classifyTokens(const std::string& text, TaskType task);
+    
+    /**
+     * @brief Check if initialized
+     */
+    bool isInitialized() const { 
+        return base_model_ && base_model_->compiled_model != nullptr; 
+    }
+    
+    /**
+     * @brief Get supported tasks
+     */
+    std::vector<TaskType> getSupportedTasks() const;
+    
+private:
+    /**
+     * @brief Get pooled output from base model
+     */
+    ov::Tensor getPooledOutput(const std::string& text);
+    
+    /**
+     * @brief Apply task-specific LoRA adapter and classification head
+     */
+    core::ClassificationResult applyLoRAAndClassify(
+        const ov::Tensor& pooled_output,
+        TaskType task
+    );
+    
+    /**
+     * @brief Load task-specific LoRA adapter and classification head
+     */
+    bool loadTaskAdapter(
+        const std::string& lora_adapters_path,
+        TaskType task,
+        int num_classes,
+        const std::string& device
+    );
+    
+    /**
+     * @brief Get task name as string
+     */
+    std::string getTaskName(TaskType task) const;
+    
+    /**
+     * @brief Get maximum sequence length for the model type
+     * @return Max sequence length (8192 for ModernBERT, 512 for BERT)
+     */
+    int getMaxSequenceLength() const;
+    
+    /**
+     * @brief Aggregate BIO tags into detected entities
+     * @param original_text The original input text
+     * @param tokens Vector of token strings
+     * @param predictions Vector of token predictions
+     * @param labels Map of class IDs to label names
+     * @return Vector of detected entities
+     */
+    std::vector<DetectedEntity> aggregateBIOTags(
+        const std::string& original_text,
+        const std::vector<std::string>& tokens,
+        const std::vector<TokenPrediction>& predictions,
+        const std::unordered_map<int, std::string>& labels
+    ) const;
+    
+    /**
+     * @brief Load label mapping from JSON file
+     * @param adapters_path Path to adapters directory containing label_mapping.json
+     * @return Map of class IDs to label names
+     */
+    std::unordered_map<int, std::string> loadLabelMapping(const std::string& adapters_path) const;
+    
+    std::shared_ptr<core::ModelInstance> base_model_;  // Frozen base model
+    std::unordered_map<TaskType, LoRAAdapter> lora_adapters_;  // Task-specific LoRA adapters
+    std::unordered_map<TaskType, std::shared_ptr<ov::CompiledModel>> task_heads_;  // Classification heads
+    std::unordered_map<TaskType, int> task_num_classes_;  // Number of classes per task
+    std::string adapters_path_;  // Path to adapters directory
+    core::OVNativeTokenizer tokenizer_;
+    std::mutex mutex_;
+    std::string model_type_;  // "bert" or "modernbert"
+};
+
+} // namespace classifiers
+} // namespace openvino_sr
+
@@ -356,6 +356,91 @@ OVTokenClassificationResult ov_classify_modernbert_tokens(const char* text, cons
  */
 OVEmbeddingResult ov_get_modernbert_embedding(const char* text, int max_length);
 
+// ================================================================================================
+// LORA ADAPTER SUPPORT (BERT AND MODERNBERT)
+// ================================================================================================
+
+/**
+ * @brief Task type enumeration for LoRA multi-task classification
+ */
+typedef enum {
+    OV_TASK_INTENT = 0,
+    OV_TASK_PII = 1,
+    OV_TASK_SECURITY = 2,
+    OV_TASK_CLASSIFICATION = 3
+} OVTaskType;
+
+
+/**
+ * @brief Initialize BERT LoRA classifier
+ * @param base_model_path Path to base BERT model (.xml file)
+ * @param lora_adapters_path Path to directory containing LoRA adapter models
+ * @param device Device name ("CPU", "GPU", etc.)
+ * @return true if initialization succeeded, false otherwise
+ */
+bool ov_init_bert_lora_classifier(
+    const char* base_model_path,
+    const char* lora_adapters_path,
+    const char* device
+);
+
+/**
+ * @brief Check if BERT LoRA classifier is initialized
+ * @return true if initialized, false otherwise
+ */
+bool ov_is_bert_lora_classifier_initialized();
+
+/**
+ * @brief Initialize ModernBERT LoRA classifier
+ * @param base_model_path Path to base ModernBERT model (.xml file)
+ * @param lora_adapters_path Path to directory containing LoRA adapter models
+ * @param device Device name ("CPU", "GPU", etc.)
+ * @return true if initialization succeeded, false otherwise
+ */
+bool ov_init_modernbert_lora_classifier(
+    const char* base_model_path,
+    const char* lora_adapters_path,
+    const char* device
+);
+
+/**
+ * @brief Check if ModernBERT LoRA classifier is initialized
+ * @return true if initialized, false otherwise
+ */
+bool ov_is_modernbert_lora_classifier_initialized();
+
+/**
+ * @brief Classify text using BERT LoRA adapter for a specific task
+ * @param text Input text
+ * @param task Task type
+ * @return Classification result
+ */
+OVClassificationResult ov_classify_bert_lora_task(const char* text, OVTaskType task);
+
+/**
+ * @brief Classify text using ModernBERT LoRA adapter for a specific task
+ * @param text Input text
+ * @param task Task type
+ * @return Classification result
+ */
+OVClassificationResult ov_classify_modernbert_lora_task(const char* text, OVTaskType task);
+
+/**
+ * @brief Token classification using BERT LoRA (for PII detection, NER, etc.)
+ * @param text Input text
+ * @param task Task type (should be PII or similar token classification task)
+ * @return Token classification result (caller must free using ov_free_token_classification_result)
+ */
+OVTokenClassificationResult ov_classify_bert_lora_tokens(const char* text, OVTaskType task);
+
+/**
+ * @brief Token classification using ModernBERT LoRA (for PII detection, NER, etc.)
+ * @param text Input text
+ * @param task Task type (should be PII or similar token classification task)
+ * @return Token classification result (caller must free using ov_free_token_classification_result)
+ */
+OVTokenClassificationResult ov_classify_modernbert_lora_tokens(const char* text, OVTaskType task);
+
 // ================================================================================================
 // UTILITY FUNCTIONS
 // ================================================================================================