Add tests

makseq · makseq · commit 4686f52c5732 · 2025-11-14T01:39:34.000Z
diff --git a/label_studio_ml/examples/deepgram/test_api.py b/label_studio_ml/examples/deepgram/test_api.py
@@ -10,38 +10,90 @@
 - Change the `request` and `expected_response` variables to match the input and output of your model.
 """
 
-import pytest
 import json
-from model import NewModel
+
+import pytest
+from label_studio_ml.response import ModelResponse
+from label_studio_sdk.label_interface.objects import PredictionValue
+from model import DeepgramModel
 
 
 @pytest.fixture
 def client():
     from _wsgi import init_app
-    app = init_app(model_class=NewModel)
+    app = init_app(model_class=DeepgramModel)
     app.config['TESTING'] = True
     with app.test_client() as client:
         yield client
 
 
-def test_predict(client):
+def test_predict(client, monkeypatch):
+    """
+    Scenario: exercise the /predict endpoint with minimal payload.
+    Steps   : patch DeepgramModel.setup to avoid env var requirements, POST minimal request.
+    Checks  : ensure HTTP 200 is returned with empty results when no context is provided.
+    """
+    # Patch setup to avoid requiring DEEPGRAM_API_KEY during model instantiation
+    monkeypatch.setattr(DeepgramModel, 'setup', lambda self: None)
+    
     request = {
         'tasks': [{
-            'data': {
-                # Your input test data here
-            }
+            'id': 1,
+            'data': {}
         }],
-        # Your labeling configuration here
-        'label_config': '<View></View>'
+        'label_config': '<View></View>',
+        'project': '1.1234567890'
     }
 
-    expected_response = {
-        'results': [{
-            # Your expected result here
+    response = client.post('/predict', data=json.dumps(request), content_type='application/json')
+    assert response.status_code == 200
+    body = json.loads(response.data)
+    assert 'results' in body
+    # When no context is provided, predict returns empty predictions
+    assert body['results'] == []
+
+
+def test_predict_endpoint_returns_stubbed_predictions(client, monkeypatch):
+    """
+    Scenario: exercise the /predict endpoint without hitting external services.
+    Steps   : patch DeepgramModel.setup and predict to avoid env vars and return stubbed data,
+              POST realistic payload to /predict, parse the JSON.
+    Checks  : ensure HTTP 200 is returned and the payload's `results` field matches the stub.
+    """
+    # Create a proper PredictionValue object with result structure
+    stub_prediction = PredictionValue(
+        result=[{
+            'from_name': 'text',
+            'to_name': 'audio',
+            'type': 'textarea',
+            'value': {'text': ['Hello from stub']}
         }]
+    )
+
+    # Patch setup to avoid requiring DEEPGRAM_API_KEY during model instantiation
+    monkeypatch.setattr(DeepgramModel, 'setup', lambda self: None)
+    
+    def fake_predict(self, tasks, context=None, **params):
+        return ModelResponse(predictions=[stub_prediction])
+
+    monkeypatch.setattr(DeepgramModel, 'predict', fake_predict)
+
+    request_payload = {
+        'tasks': [{
+            'id': 42,
+            'data': {'text': 'Sample request text'}
+        }],
+        'label_config': '<View><TextArea name="text" toName="audio"/></View>',
+        'project': '1.1234567890',
+        'params': {'context': {'result': []}}
     }
 
-    response = client.post('/predict', data=json.dumps(request), content_type='application/json')
+    response = client.post('/predict', data=json.dumps(request_payload), content_type='application/json')
+
     assert response.status_code == 200
-    response = json.loads(response.data)
-    assert response == expected_response
+    body = json.loads(response.data)
+    # The API returns results which should contain the prediction's result
+    assert 'results' in body
+    assert len(body['results']) == 1
+    # Verify the structure matches what we stubbed
+    assert body['results'][0]['result'][0]['value']['text'] == ['Hello from stub']
diff --git a/label_studio_ml/examples/deepgram/test_model.py b/label_studio_ml/examples/deepgram/test_model.py
@@ -0,0 +1,180 @@
+import os
+from unittest.mock import MagicMock
+
+import pytest
+from label_studio_ml.response import ModelResponse
+
+# Ensure the Label Studio SDK inside the Deepgram example sees harmless defaults.
+os.environ.setdefault('LABEL_STUDIO_URL', 'http://localhost')
+os.environ.setdefault('LABEL_STUDIO_API_KEY', 'test-token')
+
+from label_studio_ml.examples.deepgram import model as deepgram_model  # noqa: E402
+
+
+@pytest.fixture
+def env_settings(monkeypatch):
+    """Provide the environment variables required by the Deepgram example."""
+    settings = {
+        'DEEPGRAM_API_KEY': 'dg-key',
+        'AWS_DEFAULT_REGION': 'us-east-1',
+        'S3_BUCKET': 'test-bucket',
+        'S3_FOLDER': 'tts',
+    }
+    for key, value in settings.items():
+        monkeypatch.setenv(key, value)
+    return settings
+
+
+@pytest.fixture
+def patched_clients(monkeypatch):
+    """Patch the Deepgram SDK, boto3 client, and Label Studio SDK with mocks."""
+    mock_deepgram_client = MagicMock(name='DeepgramClientInstance')
+    mock_deepgram_ctor = MagicMock(return_value=mock_deepgram_client)
+    monkeypatch.setattr(deepgram_model, 'DeepgramClient', mock_deepgram_ctor)
+
+    mock_s3_client = MagicMock(name='S3Client')
+    monkeypatch.setattr(deepgram_model.boto3, 'client', MagicMock(return_value=mock_s3_client))
+
+    mock_ls = MagicMock(name='LabelStudio')
+    monkeypatch.setattr(deepgram_model, 'ls', mock_ls)
+
+    return {
+        'deepgram_client': mock_deepgram_client,
+        'deepgram_ctor': mock_deepgram_ctor,
+        's3_client': mock_s3_client,
+        'ls': mock_ls,
+    }
+
+
+def test_setup_raises_without_api_key(monkeypatch):
+    """
+    Scenario: setup is called without DEEPGRAM_API_KEY.
+    Steps   : remove the env var and instantiate the model (setup runs in __init__).
+    Checks  : verify ValueError is raised mentioning the missing key.
+    """
+    monkeypatch.delenv('DEEPGRAM_API_KEY', raising=False)
+
+    with pytest.raises(ValueError, match='DEEPGRAM_API_KEY'):
+        deepgram_model.DeepgramModel()
+
+
+def test_setup_initializes_clients_with_api_key(env_settings, patched_clients):
+    """
+    Scenario: setup receives valid env vars.
+    Steps   : call setup after patching external clients.
+    Checks  : ensure Deepgram & S3 clients plus region/bucket/folder are stored.
+    """
+    model = deepgram_model.DeepgramModel()
+    model.setup()
+
+    assert patched_clients['deepgram_ctor'].called
+    assert model.deepgram_client is patched_clients['deepgram_client']
+    assert model.s3_client is patched_clients['s3_client']
+    assert model.s3_region == env_settings['AWS_DEFAULT_REGION']
+    assert model.s3_bucket == env_settings['S3_BUCKET']
+    assert model.s3_folder == env_settings['S3_FOLDER']
+
+
+def test_setup_falls_back_to_access_token(env_settings, patched_clients):
+    """
+    Scenario: the Deepgram SDK rejects the api_key kwarg.
+    Steps   : make the first constructor call raise TypeError, then succeed on retry.
+    Checks  : setup retries using access_token and keeps the final client (setup runs in __init__).
+    """
+    patched_clients['deepgram_ctor'].side_effect = [
+        TypeError('unexpected kwarg'),
+        patched_clients['deepgram_client'],
+    ]
+    model = deepgram_model.DeepgramModel()
+
+    assert patched_clients['deepgram_ctor'].call_count == 2
+    first_call_kwargs = patched_clients['deepgram_ctor'].call_args_list[0].kwargs
+    second_call_kwargs = patched_clients['deepgram_ctor'].call_args_list[1].kwargs
+    assert 'api_key' in first_call_kwargs
+    assert 'access_token' in second_call_kwargs
+    assert model.deepgram_client is patched_clients['deepgram_client']
+
+
+def test_predict_no_context_returns_empty_modelresponse(env_settings, patched_clients):
+    """
+    Scenario: predict is invoked before the user submits any text.
+    Steps   : set up env vars and mocks, then call predict with empty context/result payloads.
+    Checks  : confirm an empty ModelResponse is returned immediately without calling external services.
+    """
+    model = deepgram_model.DeepgramModel()
+    tasks = [{'id': 1}]
+
+    response = model.predict(tasks=tasks, context=None)
+
+    assert isinstance(response, ModelResponse)
+    assert response.predictions == []
+    # Verify no external calls were made
+    patched_clients['deepgram_client'].speak.v1.audio.generate.assert_not_called()
+    patched_clients['s3_client'].upload_file.assert_not_called()
+
+
+def test_predict_generates_audio_uploads_to_s3_and_updates_task(env_settings, patched_clients):
+    """
+    Scenario: predict handles a happy path request.
+    Steps   : mock Deepgram audio chunks, S3 upload, and Label Studio update.
+    Checks  : verify Deepgram is called, S3 upload args are correct, ls.tasks.update
+              receives the S3 URL, and the temporary file is deleted.
+    """
+    patched_clients['deepgram_client'].speak.v1.audio.generate.return_value = [b'chunk-a', b'chunk-b']
+    model = deepgram_model.DeepgramModel()
+    model.setup()
+
+    tasks = [{'id': 123}]
+    context = {
+        'user_id': 'user-7',
+        'result': [{'value': {'text': ['Hello Deepgram']}}],
+    }
+
+    model.predict(tasks=tasks, context=context)
+
+    patched_clients['deepgram_client'].speak.v1.audio.generate.assert_called_once_with(text='Hello Deepgram')
+    assert patched_clients['s3_client'].upload_file.call_count == 1
+
+    upload_args = patched_clients['s3_client'].upload_file.call_args.kwargs
+    local_path = patched_clients['s3_client'].upload_file.call_args.args[0]
+    assert upload_args['ExtraArgs']['ContentType'] == 'audio/mpeg'
+    assert upload_args['ExtraArgs']['ACL'] == 'public-read'
+    assert upload_args['ExtraArgs']['CacheControl'].startswith('public')
+
+    expected_key = f"{env_settings['S3_FOLDER']}/123_user-7.mp3"
+    assert patched_clients['s3_client'].upload_file.call_args.args[2] == expected_key
+
+    expected_url = f"https://{env_settings['S3_BUCKET']}.s3.{env_settings['AWS_DEFAULT_REGION']}.amazonaws.com/{expected_key}"
+    patched_clients['ls'].tasks.update.assert_called_once_with(
+        id=123,
+        data={'text': 'Hello Deepgram', 'audio': expected_url},
+    )
+
+    assert not os.path.exists(local_path)
+
+
+def test_predict_s3_failure_raises_and_cleans_up_temp_file(env_settings, patched_clients):
+    """
+    Scenario: the S3 upload raises an exception.
+    Steps   : let Deepgram produce chunks, force upload_file to fail.
+    Checks  : ensure the exception bubbles up, temp file is removed, and Label Studio
+              is never updated.
+    """
+    patched_clients['deepgram_client'].speak.v1.audio.generate.return_value = [b'chunk']
+    patched_clients['s3_client'].upload_file.side_effect = RuntimeError('s3 boom')
+    model = deepgram_model.DeepgramModel()
+    model.setup()
+
+    tasks = [{'id': 999}]
+    context = {
+        'user_id': 'user-1',
+        'result': [{'value': {'text': ['Explode']}}],
+    }
+
+    with pytest.raises(RuntimeError, match='s3 boom'):
+        model.predict(tasks=tasks, context=context)
+
+    local_path = patched_clients['s3_client'].upload_file.call_args.args[0]
+    assert not os.path.exists(local_path)
+    patched_clients['ls'].tasks.update.assert_not_called()
+