CogStack
diff --git a/‎app/api/api.py‎
Lines changed: 18 additions & 6 deletions b/‎app/api/api.py‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎app/api/routers/generative.py‎
Lines changed: 3 additions & 2 deletions b/‎app/api/routers/generative.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎app/api/routers/supervised_training.py‎
Lines changed: 16 additions & 8 deletions b/‎app/api/routers/supervised_training.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎app/api/utils.py‎
Lines changed: 2 additions & 2 deletions b/‎app/api/utils.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎app/domain.py‎
Lines changed: 19 additions & 0 deletions b/‎app/domain.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎app/exception.py‎
Lines changed: 5 additions & 1 deletion b/‎app/exception.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎app/model_services/huggingface_llm_model.py‎
Lines changed: 67 additions & 4 deletions b/‎app/model_services/huggingface_llm_model.py‎
Lines changed: 67 additions & 4 deletions
diff --git a/‎app/processors/metrics_collector.py‎
Lines changed: 30 additions & 0 deletions b/‎app/processors/metrics_collector.py‎
Lines changed: 30 additions & 0 deletions
@@ -4,7 +4,7 @@
 import os.path
 import app.api.globals as cms_globals
 
-from typing import Dict, Any, Optional
+from typing import Dict, Any, Optional, Union, Type
 from concurrent.futures import ThreadPoolExecutor
 from anyio.lowlevel import RunVar
 from anyio import CapacityLimiter
@@ -20,7 +20,7 @@
 from app.api.dependencies import ModelServiceDep
 from app.api.utils import add_exception_handlers, add_rate_limiter, init_vllm_engine
 from app.config import Settings
-from app.domain import Tags, TagsStreamable
+from app.domain import Tags, TagsStreamable, TagsGenerative
 from app.management.tracker_client import TrackerClient
 from app.utils import get_settings, unpack_model_data_package, get_model_data_package_base_name
 from app.exception import ConfigurationException
@@ -131,6 +131,11 @@ def get_generative_server(config: Settings, msd_overwritten: Optional[ModelServi
     app = _load_health_check_router(app)
     logger.debug("Health check router loaded")
 
+    if config.ENABLE_TRAINING_APIS == "true":
+        app = _load_supervised_training_router(app)
+        logger.debug("Supervised training router loaded")
+        app = _load_training_operations(app)
+
     if config.AUTH_USER_ENABLED == "true":
         app = _load_auth_router(app)
         logger.debug("Auth router loaded")
@@ -198,11 +203,18 @@ def _get_app(
     streamable: bool = False,
     generative: bool = False,
 ) -> FastAPI:
-    tags_metadata = [{  # type: ignore
-        "name": tag.name,
-        "description": tag.value
-    } for tag in (Tags if not streamable else TagsStreamable)]
     config = get_settings()
+    tags: Union[Type[Tags], Type[TagsStreamable], Type[TagsGenerative]]
+    if generative:
+        tags = TagsGenerative
+    elif streamable:
+        tags = TagsStreamable
+    else:
+        tags = Tags
+    tags_metadata = [{  # type: ignore
+        "name": tag.name,   # type: ignore
+        "description": tag.value    # type: ignore
+    } for tag in tags]
     app = FastAPI(
         title="CogStack ModelServe",
         summary="A model serving and governance system for CogStack NLP solutions",
 
@@ -13,6 +13,7 @@
 from starlette.status import HTTP_200_OK, HTTP_400_BAD_REQUEST, HTTP_500_INTERNAL_SERVER_ERROR
 from app.domain import (
     Tags,
+    TagsGenerative,
     OpenAIChatRequest,
     OpenAIChatResponse,
     OpenAIEmbeddingsRequest,
@@ -41,7 +42,7 @@
 
 @router.post(
     PATH_GENERATE,
-    tags=[Tags.Generative.name],
+    tags=[TagsGenerative.Generative],
     response_class=PlainTextResponse,
     dependencies=[Depends(cms_globals.props.current_active_user)],
     description="Generate text",
@@ -91,7 +92,7 @@ def generate_text(
 
 @router.post(
     PATH_GENERATE_ASYNC,
-    tags=[Tags.Generative.name],
+    tags=[TagsGenerative.Generative],
     response_class=StreamingResponse,
     dependencies=[Depends(cms_globals.props.current_active_user)],
     description="Generate a stream of texts",
 
@@ -12,9 +12,9 @@
 
 import app.api.globals as cms_globals
 from app.api.dependencies import validate_tracking_id
-from app.domain import Tags
+from app.domain import Tags, ModelType
 from app.model_services.base import AbstractModelService
-from app.processors.metrics_collector import concat_trainer_exports
+from app.processors.metrics_collector import concat_json_lists, concat_trainer_exports
 from app.utils import filter_by_concept_ids
 
 router = APIRouter()
@@ -72,12 +72,19 @@ async def train_supervised(
         files.append(temp_te)
         file_names.append("" if te.filename is None else te.filename)
 
-    concatenated = concat_trainer_exports([file.name for file in files], allow_recurring_doc_ids=False)
-    logger.debug("Training exports concatenated")
-    data_file = tempfile.NamedTemporaryFile(mode="w")
-    concatenated = filter_by_concept_ids(cast(Dict[str, Any], concatenated), model_service.info().model_type)
-    logger.debug("Training exports filtered by concept IDs")
-    json.dump(concatenated, data_file)
+    if model_service.info().model_type is not ModelType.HUGGINGFACE_LLM:
+        concatenated_te = concat_trainer_exports([file.name for file in files], allow_recurring_doc_ids=False)
+        logger.debug("Training exports concatenated")
+        data_file = tempfile.NamedTemporaryFile(mode="w+")
+        concatenated_te = filter_by_concept_ids(cast(Dict[str, Any], concatenated_te), model_service.info().model_type)
+        logger.debug("Training exports filtered by concept IDs")
+        json.dump(concatenated_te, data_file)
+    else:
+        concatenated = concat_json_lists([file.name for file in files])
+        logger.debug("Training exports concatenated")
+        data_file = tempfile.NamedTemporaryFile(mode="w+")
+        json.dump(concatenated, data_file)
+
     data_file.flush()
     data_file.seek(0)
     training_id = tracking_id or str(uuid.uuid4())
@@ -102,6 +109,7 @@ async def train_supervised(
     return _get_training_response(training_response, training_id)
 
 
+
 def _get_training_response(training_response: Tuple[bool, str, str], training_id: str) -> JSONResponse:
     training_accepted, experiment_id, run_id = training_response
     if training_accepted:
 
@@ -26,7 +26,7 @@
 from slowapi.errors import RateLimitExceeded
 from fastapi_users.jwt import decode_jwt
 from app.config import Settings
-from app.domain import Tags
+from app.domain import TagsGenerative
 from app.exception import StartTrainingException, AnnotationException, ConfigurationException, ClientException
 
 logger = logging.getLogger("cms")
@@ -376,7 +376,7 @@ async def _stream() -> AsyncGenerator[bytes, None]:
             endpoint=endpoint,
             methods=methods,
             include_in_schema=True,
-            tags=[Tags.Generative],
+            tags=[TagsGenerative.Generative.name],
         )
     app.include_router(router)
 
 
@@ -31,9 +31,15 @@ class Tags(str, Enum):
 
 
 class TagsStreamable(str, Enum):
+    Metadata = "Get the model card"
     Streaming = "Retrieve NER entities as a stream by running the model"
 
 
+class TagsGenerative(str, Enum):
+    Metadata = "Get the model card"
+    Generative = "Generate text based on the input prompt"
+
+
 class CodeType(str, Enum):
     SNOMED = "SNOMED"
     UMLS = "UMLS"
@@ -104,6 +110,19 @@ class LlmEngine(Enum):
     CMS = "CMS"
     VLLM = "vLLM"
 
+class LlmRole(Enum):
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
+    TOOL = "tool"
+
+class LlmTrainerType(Enum):
+    GRPO = "grpo"
+    PPO = "ppo"
+
+class LlmDatasetType(Enum):
+    JSON = "json"
+    CSV = "csv"
 
 class Annotation(BaseModel):
     doc_name: Optional[str] = Field(default=None, description="The name of the document to which the annotation belongs")
 
@@ -27,4 +27,8 @@ class ClientException(Exception):
 
 
 class DatasetException(Exception):
-    """ An exception raised due to dataset errors"""
+    """An exception raised due to dataset errors"""
+
+
+class DeviceNotAvailableError(RuntimeError):
+    """An exception raised when a specificy device is required but not available."""
@@ -3,17 +3,19 @@
 import asyncio
 import torch
 from concurrent.futures import ThreadPoolExecutor
-from typing import Dict, List, Optional, Tuple, Any, AsyncIterable, Callable, Union
+from typing import Dict, List, Optional, Tuple, Any, AsyncIterable, TextIO, Callable, Union
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     PreTrainedModel,
     PreTrainedTokenizerBase,
     TextIteratorStreamer,
+    BitsAndBytesConfig,
 )
 from app import __version__ as app_version
 from app.exception import ConfigurationException
 from app.model_services.base import AbstractModelService
+from app.trainers.huggingface_llm_trainer import HuggingFaceLlmSupervisedTrainer
 from app.domain import ModelCard, ModelType, Annotation
 from app.config import Settings
 from app.utils import (
@@ -123,13 +125,19 @@ def from_model(cls, model: PreTrainedModel, tokenizer: PreTrainedTokenizerBase)
         return model_service
 
     @staticmethod
-    def load_model(model_file_path: str, *args: Tuple, **kwargs: Dict[str, Any]) -> Tuple[PreTrainedModel, PreTrainedTokenizerBase]:
+    def load_model(
+        model_file_path: str,
+        *args: Tuple,
+        load_in_4bit: bool = False,
+        **kwargs: Dict[str, Any]
+    ) -> Tuple[PreTrainedModel, PreTrainedTokenizerBase]:
         """
         Loads a pre-trained model and its tokenizer from a model package file.
 
         Args:
             model_file_path (str): The path to the model package file.
             *args (Tuple): Additional positional arguments.
+            load_in_4bit (bool): Whether to load the model in 4-bit precision. Defaults to False.
             **kwargs (Dict[str, Any]): Additional keyword arguments.
 
         Returns:
@@ -142,7 +150,16 @@ def load_model(model_file_path: str, *args: Tuple, **kwargs: Dict[str, Any]) ->
         model_path = os.path.join(os.path.dirname(model_file_path), get_model_data_package_base_name(model_file_path))
         if unpack_model_data_package(model_file_path, model_path):
             try:
-                model = AutoModelForCausalLM.from_pretrained(model_path)
+                if load_in_4bit:
+                    bnb_config = BitsAndBytesConfig(
+                        load_in_4bit=True,
+                        bnb_4bit_quant_type="nf4",
+                        bnb_4bit_compute_dtype=torch.bfloat16,
+                        bnb_4bit_use_double_quant=True,
+                    )
+                    model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=bnb_config)
+                else:
+                    model = AutoModelForCausalLM.from_pretrained(model_path)
                 ensure_tensor_contiguity(model)
                 tokenizer = AutoTokenizer.from_pretrained(
                     model_path,
@@ -172,7 +189,7 @@ def init_model(self) -> None:
             if non_default_device_is_available(get_settings().DEVICE):
                 self._model.to(get_settings().DEVICE)
             if self._enable_trainer:
-                logger.error("Trainers are not yet implemented for HuggingFace Generative models")
+                self._supervised_trainer = HuggingFaceLlmSupervisedTrainer(self)
 
     def info(self) -> ModelCard:
         """
@@ -355,3 +372,49 @@ def create_embeddings(
 
         results = embeddings.cpu().numpy().tolist()
         return results[0] if isinstance(text, str) else results
+
+    def train_supervised(
+        self,
+        data_file: TextIO,
+        epochs: int,
+        log_frequency: int,
+        training_id: str,
+        input_file_name: str,
+        raw_data_files: Optional[List[TextIO]] = None,
+        description: Optional[str] = None,
+        synchronised: bool = False,
+        **hyperparams: Dict[str, Any],
+    ) -> Tuple[bool, str, str]:
+        """
+        Initiates supervised training on the model.
+
+        Args:
+            data_file (TextIO): The file containing the trainer export data.
+            epochs (int): The number of training epochs.
+            log_frequency (int): The number of epochs after which training metrics will be logged.
+            training_id (str): A unique identifier for the training process.
+            input_file_name (str): The name of the input file to be logged.
+            raw_data_files (Optional[List[TextIO]]): Additional raw data files to be logged. Defaults to None.
+            description (Optional[str]): The description of the training or change logs. Defaults to empty.
+            synchronised (bool): Whether to wait for the training to complete.
+            **hyperparams (Dict[str, Any]): Additional hyperparameters for training.
+
+        Returns:
+            Tuple[bool, str, str]: A tuple with the first element indicating success or failure.
+
+        Raises:
+            ConfigurationException: If the supervised trainer is not enabled.
+        """
+        if self._supervised_trainer is None:
+            raise ConfigurationException("The supervised trainer is not enabled")
+        return self._supervised_trainer.train(
+            data_file,
+            epochs,
+            log_frequency,
+            training_id,
+            input_file_name,
+            raw_data_files,
+            description,
+            synchronised,
+            **hyperparams,
+        )
@@ -194,6 +194,36 @@ def concat_trainer_exports(
         return combined
 
 
+def concat_json_lists(
+    data_file_paths: List[str],
+    combined_data_file_path: Optional[str] = None,
+) -> Union[List[Dict[str, Any]], str]:
+    """
+    Concatenates multiple json list files into a single combined file.
+
+    Args:
+        data_file_paths (List[str]): List of paths to files each containing a json list.
+        combined_data_file_path (Optional[str]): The file path where the combined data will be saved. If None, the combined data will be returned as a list.
+
+
+    Returns:
+        Union[List[Dict[str, Any]], str]: The path to the combined data file if `combined_data_file_path` is provided, or the combined data as a list otherwise.
+    """
+    combined: List = []
+    for path in data_file_paths:
+        with open(path, "r") as f:
+            data = json.load(f)
+        combined.extend(data)
+
+    if isinstance(combined_data_file_path, str):
+        with open(combined_data_file_path, "w") as f:
+            json.dump(combined, f)
+
+        return combined_data_file_path
+    else:
+        return combined
+
+
 def get_stats_from_trainer_export(
     trainer_export: Union[str, IO, Dict],
     return_df: bool = False,