Azure · SuchethaChintha · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
@@ -0,0 +1,15 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Model package containing model serving functionality."""
+import os
+import sys
+
+
+def pytest_configure(config):
+    current_directory = os.path.dirname(os.path.abspath(__file__))
+    parent_directory = os.path.dirname(current_directory)
+    src_directory = os.path.join(parent_directory, "context", "foundation", "model", "serve")
+
+    # Need to add src directory to the path to enable discovery of src files by the test directory
+    sys.path.append(parent_directory)
+    sys.path.append(src_directory)
@@ -0,0 +1,44 @@
+# Running Unit Tests Locally
+In order to run unit tests locally, first ensure you have the necessary requirements installed defined by requirements.txt in the tests directory. If not, the follow the steps below. . Note that not all tests will be available. Any test that imports mii or vllm will not be able to run on a local Windows OS as they are not compatible with Windows. In order to run all tests, check out a Compute Instance and install the necessary packages there.
+1. cd into the tests directory
+2. Run pip install -r requirements.txt in the conda environment. This will install the necessary requirements to run pytest
+3. If you enter a test file such as test_conversation.py and see a yellow squiggly line underneath *conversation* in the line *from conversation import ....* hover on the word *conversation*. There will be an option that pops up that says View Problem and Quick Fix. Select Quick Fix and select *Add ./fm-inference/src to ExtraPaths*.
+4. If you have trouble with step 3, instead go to the .vscode folder. Inside there should be a settings.json file (if not create one). If you create one then copy: 
+
+        {
+            "python.linting.flake8Args": [ 
+                "--max-line-length=119", 
+                "--ignore=W503", 
+            ],
+            "python.testing.unittestEnabled": false,
+            "python.testing.pytestEnabled": true,
+            "python.analysis.extraPaths": [
+                "./fm-inference/src"
+            ]
+        }
+
+    If the directory already has a settings.json file, simply add 
+
+            "python.analysis.extraPaths": [
+                "./fm-inference/src"
+            ]
+
+    to the file.
+
+5. Now open a vscode command line terminal and activate your conda environment. Run pytest *PATH_TO_TEST_FILE* and the unit test should run
+
+## Optionally
+To use the builtin VSCode testing, do the following
+1. Follow steps 1-4 from above
+2. Once that is completed, Click *Ctrl + Shift + P*. A search bar should appear
+3. Type in *Configure Tests*, then select *pytest*, and then *fm-inference*. This should start the test discovery process. 
+4. Click on the test flask icon located on the left panel in VSCode. If test discovery was successful, a dropdown menu will be available. Keep opening the dropdown menus until you reach the unit test file you like then click the play icon.
+
+
+### Failures
+
+#### Conda Environment
+If there is a failure, also ensure you have the correct conda environment selected. To change this, you can click *Ctrl + Shift + P* and search *Select Interpreter*. Then choose the Python version that is associated with your conda environment. Then rerun *Configure Tests*.
+
+#### Test Discovery
+If test discovery was successful. A dropdown menu will appear with foundation-models-inference at the top, followedby fm-inference, then tests. You can see this if you click the arrows to the left hand side of the titles. Clicking the play icon for the foundation-models-inference menu In order to actually run tests, you must click the play icon on the fm-inference menu or any submenu below it.
@@ -0,0 +1,25 @@
+torch==2.8.0
+deepspeed-mii==0.3.3
+pandas==2.1.4
+transformers==4.55.2
+vllm==0.11.0
+aiolimiter==1.1.1
+azure-ai-ml==1.13.0
+azureml-inference-server-http==1.4.1
+azureml-mlflow==1.60.0
+azureml-ai-monitoring==1.0.0
+azure-identity==1.23.0
+azure-ai-contentsafety==1.0.0b1
+certifi==2025.7.14
+requests==2.32.4
+aiohttp==3.10.11
+pillow==11.3.0
+httpx==0.28.1
+fastapi==0.120.4
+uvicorn==0.35.0
+scipy==1.15.3
+accelerate==1.9.0
+pytest==8.4.1
+pytest-asyncio==1.1.0
+ruamel.yaml==0.15.42
+h2==4.3.0
@@ -0,0 +1,85 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import unittest
+from conversation import Conversation, Role, TextMessage, MultimodalMessage, MultimodalContent
+
+
+class TestConversationModel(unittest.TestCase):
+    def test_empty_conversation(self):
+        # Create an empty conversation and serialize to JSON
+        conv = Conversation()
+        json_str = conv.to_json()
+        self.assertEqual(json_str, "[]")
+
+        # Deserialize back to an empty Conversation object
+        new_conv = Conversation.from_json(json_str)
+        self.assertEqual(len(new_conv.messages), 0)
+
+    def test_first_message_validation(self):
+        with self.assertRaises(ValueError):
+            Conversation([TextMessage(Role.ASSISTANT, "This should fail.")])
+
+        try:
+            Conversation([TextMessage(Role.SYSTEM, "This is valid.")])
+        except ValueError:
+            self.fail("Conversation initialization failed when it should have succeeded.")
+
+        try:
+            Conversation([TextMessage(Role.USER, "This is valid.")])
+        except ValueError:
+            self.fail("Conversation initialization failed when it should have succeeded.")
+
+    def test_add_message_validation(self):
+        # Create empty conversation
+        conv = Conversation()
+
+        with self.assertRaises(ValueError):
+            conv.add_message(TextMessage(Role.ASSISTANT, "This should fail."))
+
+        conv.add_message(TextMessage(Role.SYSTEM, "This is valid."))
+
+        try:
+            conv.add_message(TextMessage(Role.USER, "This is valid."))
+            conv.add_message(TextMessage(Role.ASSISTANT, "This is valid."))
+        except ValueError:
+            self.fail("add_message failed when it should have succeeded.")
+
+    def test_serialization_deserialization(self):
+        # Create a conversation and serialize it
+        conv = Conversation([
+            TextMessage(Role.SYSTEM, "You are a helpful assistant."),
+            TextMessage(Role.USER, "Who won the world series in 2020?"),
+            TextMessage(Role.ASSISTANT, "The Los Angeles Dodgers won the World Series in 2020."),
+        ])
+        json_str = conv.to_json()
+
+        # Deserialize back to a Conversation object
+        new_conv = Conversation.from_json(json_str)
+
+        # Check if the deserialized conversation is the same as the original
+        for orig_msg, new_msg in zip(conv.messages, new_conv.messages):
+            self.assertEqual(orig_msg.role, new_msg.role)
+            self.assertEqual(orig_msg.content, new_msg.content)
+
+    def test_multimodal_message(self):
+        multimodal_msg = MultimodalMessage(
+            role=Role.USER,
+            content=[
+                MultimodalContent(type="text", text="Show me an image of a cat"),
+                MultimodalContent(type="image_url", image_url={"url": "https://example.com/cat.jpg"}),
+            ],
+        )
+
+        conv = Conversation([TextMessage(Role.SYSTEM, "I can show images."), multimodal_msg])
+        json_str = conv.to_json()
+        new_conv = Conversation.from_json(json_str)
+
+        self.assertEqual(len(new_conv.messages), 2)
+        self.assertIsInstance(new_conv.messages[1], MultimodalMessage)
+        self.assertEqual(new_conv.messages[1].content[0].text, "Show me an image of a cat")
+        self.assertEqual(new_conv.messages[1].content[1].image_url["url"], "https://example.com/cat.jpg")
+
+
+if __name__ == '__main__':
+    unittest.main()
@@ -0,0 +1,91 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import unittest
+from unittest.mock import MagicMock, patch
+
+from configs import EngineConfig
+from configs import TaskConfig
+from constants import TaskType
+from fm_score import FMScore
+from fm_score import get_formatter
+from managed_inference import MIRPayload
+
+
+class TestFMScore(unittest.TestCase):
+    def setUp(self):
+        self.sample_config = {
+            'engine': {
+                'engine_name': 'mii',
+                'model_id': 'Llama2',
+                'tokenizer': 'tokenizer',
+                'mii_config': {
+                    'deployment_name': 'sample_deployment',
+                    'mii_configs': {},
+                    'ds_config': None,
+                    'ds_zero': False,
+                },
+            },
+            'task': {
+                'task_type': TaskType.TEXT_GENERATION,
+            },
+        }
+
+    @patch('fm_score.get_formatter')
+    @patch('fm_score.ReplicaManager')
+    def test_init(self, mock_replica_manager, mock_get_formatter):
+        mock_formatter = MagicMock()
+        mock_replica_manager_instance = MagicMock()
+        mock_replica_manager.return_value = mock_replica_manager_instance
+        mock_get_formatter.return_value = mock_formatter
+
+        fms = FMScore(self.sample_config)
+        fms.init()
+
+        self.assertEqual(fms.engine_config, EngineConfig.from_dict(self.sample_config['engine']))
+        self.assertEqual(fms.task_config, TaskConfig.from_dict(self.sample_config['task']))
+        mock_get_formatter.assert_called_once_with(model_name='Llama2')
+        self.assertEqual(fms.formatter, mock_formatter)
+        self.assertEqual(fms.replica_manager, mock_replica_manager_instance)
+
+    @patch('fm_score.get_formatter')
+    @patch('fm_score.ReplicaManager')
+    def test_run(self, mock_replica_manager, mock_get_formatter):
+        mock_formatter = MagicMock()
+        mock_replica_manager_instance = MagicMock()
+        mock_replica_manager.return_value = mock_replica_manager_instance
+        mock_get_formatter.return_value = mock_formatter
+
+        fms = FMScore(self.sample_config)
+        fms.init()
+
+        payload = MIRPayload('Today is a wonderful day to ', {'max_length': 128}, fms.task_config.task_type, True)
+        payload.convert_query_to_list()
+        output = fms.run(payload)
+
+        mock_formatter.format_prompt.assert_called_once_with(
+            fms.task_config.task_type,
+            'Today is a wonderful day to ', {'max_length': 128},
+        )
+        mock_replica_manager_instance.get_replica().generate.assert_called_once_with(
+            [mock_formatter.format_prompt.return_value], {'max_length': 128}
+        )
+        self.assertEqual(output, mock_replica_manager_instance.get_replica().generate.return_value)
+
+    def test_invalid_model(self):
+        with self.assertRaises(ValueError):
+            get_formatter('invalid_model')
+
+    @patch('fm_score.ReplicaManager')
+    def test_initialize_formatter_from_custom_model_config_builder(self, mock_replica_manager):
+        custom_model_config_builder = MagicMock()
+        custom_model_config_builder.get_formatter = MagicMock(return_value='custom formatter')
+        self.sample_config["engine"]["custom_model_config_builder"] = custom_model_config_builder
+        self.sample_config["task"]["task_type"] = TaskType.TEXT_TO_IMAGE_INPAINTING
+        fms = FMScore(self.sample_config)
+        fms.init()
+        assert fms.formatter == 'custom formatter'
+
+
+if __name__ == '__main__':
+    unittest.main()
@@ -0,0 +1,51 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import unittest
+from context.foundation.model.serve.engine.hf_engine import HfEngine
+from context.foundation.model.serve.configs import EngineConfig, TaskConfig
+from context.foundation.model.serve.constants import TaskType
+
+
+class TestHfEngine(unittest.TestCase):
+    fill_mask_task_config = TaskConfig(task_type=TaskType.FILL_MASK)
+    qna_task_config = TaskConfig(task_type=TaskType.QnA)
+
+    def test_fill_mask(self):
+        ml_model_info = {
+            "task": "fill-mask",
+            "hf_tokenizer_class": "BertTokenizerFast",
+            "hf_pretrained_class": "BertForMaskedLM"
+        }
+        engine_config = EngineConfig(engine_name="hf", model_id="bert-base-cased",
+                                     tokenizer="bert-base-cased", hf_config_path="bert-base-cased",
+                                     ml_model_info=ml_model_info)
+        engine = HfEngine(engine_config, self.fill_mask_task_config)
+        test_tokens = engine.generate(["[MASK] is the capital of France."], params={})
+        self.assertIsNotNone(test_tokens)
+
+    def test_question_answering(self):
+        ml_model_info = {
+            "task": "question-answering",
+            "hf_tokenizer_class": "DistilBertTokenizerFast",
+            "hf_pretrained_class": "DistilBertForQuestionAnswering"
+        }
+        engine_config = EngineConfig(engine_name="hf", model_id="distilbert-base-cased-distilled-squad",
+                                     tokenizer="distilbert-base-cased-distilled-squad",
+                                     hf_config_path="distilbert-base-cased-distilled-squad",
+                                     ml_model_info=ml_model_info)
+        engine = HfEngine(engine_config, self.qna_task_config)
+        context = """
+            Extractive Question Answering is the task of extracting an answer from a text given a question.
+            An example of a question answering dataset is the SQuAD dataset,
+            which is entirely based on that task.
+            If you would like to fine-tune a model on a SQuAD task,
+            you may leverage the examples/pytorch/question-answering/run_squad.py script.
+        """
+        test_tokens = engine.generate([{"question": "What is a good example of a question answering dataset?",
+                                        "context": context}], params={})
+        self.assertIsNotNone(test_tokens)
+
+
+if __name__ == "__main__":
+    unittest.main()