Merge pull request #1089 from NASA-IMPACT/1088-api-tests-for-token-verification-request-accuracy-response-parsing-and-error-handling

CarsonDavis · web-flow · commit af5e77454b62 · 2025-01-31T03:33:08.000Z
API Tests for Token Verification, Request Accuracy, Response Parsing, and Error Handling
diff --git a/sde_collections/tests/test_sinequa_api.py b/sde_collections/tests/test_sinequa_api.py
@@ -1,4 +1,5 @@
 # docker-compose -f local.yml run --rm django pytest sde_collections/tests/api_tests.py
+import json
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -12,6 +13,12 @@
 
 @pytest.mark.django_db
 class TestApiClass:
+    """
+    Test suite for the Sinequa API integration.
+    Tests cover authentication, query construction, response processing,
+    and error handling across different server configurations.
+    """
+
     @pytest.fixture
     def collection(self):
         """Fixture to create a collection object for testing."""
@@ -25,7 +32,10 @@ def collection(self):
 
     @pytest.fixture
     def api_instance(self):
-        """Fixture to create an Api instance with mocked server configs."""
+        """
+        Fixture to create an Api instance with mocked server configs.
+        Provides a consistent test environment with predefined credentials.
+        """
         with patch(
             "sde_collections.sinequa_api.server_configs",
             {
@@ -41,35 +51,71 @@ def api_instance(self):
 
     @patch("requests.post")
     def test_process_response_success(self, mock_post, api_instance):
-        """Test that process_response handles successful responses."""
+        """
+        Test that process_response successfully handles and parses API responses.
+        Verifies:
+        1. Correct HTTP request processing
+        2. JSON response parsing
+        3. Return value structure
+        """
         mock_response = MagicMock()
         mock_response.status_code = 200
         mock_response.json.return_value = {"key": "value"}
         mock_post.return_value = mock_response
 
         response = api_instance.process_response("http://example.com", payload={"test": "data"})
         assert response == {"key": "value"}
+        mock_post.assert_called_once()
 
     @patch("requests.post")
     def test_process_response_failure(self, mock_post, api_instance):
-        """Test that process_response raises an exception on failure."""
+        """
+        Test that process_response properly handles failed API requests.
+        Verifies appropriate exception raising and error messaging.
+        """
         mock_response = MagicMock()
         mock_response.status_code = 500
         mock_post.return_value = mock_response
-        mock_response.raise_for_status.side_effect = Exception("Internal Server Error")
+        mock_response.raise_for_status.side_effect = requests.RequestException("Internal Server Error")
 
-        with pytest.raises(Exception, match="Internal Server Error"):
+        with pytest.raises(requests.RequestException, match="Internal Server Error"):
             api_instance.process_response("http://example.com", payload={"test": "data"})
 
+    def test_missing_token_for_sql_query(self, api_instance):
+        """
+        Test that attempting SQL queries without a token raises an appropriate error.
+        Verifies token validation before query execution.
+        """
+        api_instance._provided_token = None
+        with pytest.raises(ValueError, match="Token is required"):
+            api_instance._execute_sql_query("SELECT * FROM test")
+
     @patch("sde_collections.sinequa_api.Api.process_response")
     def test_query(self, mock_process_response, api_instance):
-        """Test that query sends correct payload and processes response."""
+        """
+        Test that query method:
+        1. Constructs the correct URL and payload based on input parameters
+        2. Processes API response correctly
+        3. Returns expected data structure
+        """
         mock_process_response.return_value = {"result": "success"}
         response = api_instance.query(page=1, collection_config_folder="folder")
         assert response == {"result": "success"}
 
+        # Verify payload construction
+        mock_process_response.assert_called_once()
+        call_args = mock_process_response.call_args
+        assert "folder" in str(call_args)  # Verify collection folder is included
+        assert "page" in str(call_args)  # Verify pagination parameters
+
     def test_process_rows_to_records(self, api_instance):
-        """Test processing row data into record dictionaries."""
+        """
+        Test processing of raw SQL row data into structured record dictionaries.
+        Verifies:
+        1. Correct parsing of valid input data
+        2. Error handling for malformed rows
+        3. Output format consistency
+        """
         # Test valid input
         valid_rows = [["http://example.com/1", "Text 1", "Title 1"], ["http://example.com/2", "Text 2", "Title 2"]]
         expected_output = [
@@ -85,7 +131,13 @@ def test_process_rows_to_records(self, api_instance):
 
     @patch("sde_collections.sinequa_api.Api.process_response")
     def test_execute_sql_query(self, mock_process_response, api_instance):
-        """Test SQL query execution."""
+        """
+        Test SQL query execution with token-based authentication.
+        Verifies:
+        1. Query construction
+        2. Token validation
+        3. Response processing
+        """
         mock_process_response.return_value = {"Rows": [], "TotalRowCount": 0}
 
         # Test successful query
@@ -99,7 +151,13 @@ def test_execute_sql_query(self, mock_process_response, api_instance):
 
     @patch("sde_collections.sinequa_api.Api._execute_sql_query")
     def test_get_full_texts_pagination(self, mock_execute_sql, api_instance):
-        """Test that get_full_texts correctly handles pagination."""
+        """
+        Test pagination handling in get_full_texts method.
+        Verifies:
+        1. Correct batch processing
+        2. Accurate record counting
+        3. Proper iteration termination
+        """
         # Mock responses for two pages of results
         mock_execute_sql.side_effect = [
             {
@@ -117,17 +175,18 @@ def test_get_full_texts_pagination(self, mock_execute_sql, api_instance):
         assert len(batches[0]) == 2  # First batch has 2 records
         assert len(batches[1]) == 1  # Second batch has 1 record
 
-        # Verify content of first batch
+        # Verify content of batches
         assert batches[0] == [
             {"url": "http://example.com/1", "full_text": "Text 1", "title": "Title 1"},
             {"url": "http://example.com/2", "full_text": "Text 2", "title": "Title 2"},
         ]
-
-        # Verify content of second batch
         assert batches[1] == [{"url": "http://example.com/3", "full_text": "Text 3", "title": "Title 3"}]
 
     def test_get_full_texts_missing_index(self, api_instance):
-        """Test that get_full_texts raises error when index is missing from config."""
+        """
+        Test error handling when index configuration is missing.
+        Verifies appropriate error message and exception type.
+        """
         api_instance.config.pop("index", None)
         with pytest.raises(ValueError, match="Index not defined for server"):
             next(api_instance.get_full_texts("test_folder"))
@@ -141,7 +200,13 @@ def test_get_full_texts_missing_index(self, api_instance):
     )
     @patch("requests.post")
     def test_query_authentication(self, mock_post, server_name, expect_auth, api_instance):
-        """Test authentication handling for different server types."""
+        """
+        Test authentication handling for different server types.
+        Verifies:
+        1. Dev servers require authentication
+        2. Production servers skip authentication
+        3. Correct credential handling
+        """
         api_instance.server_name = server_name
         mock_post.return_value = MagicMock(status_code=200, json=lambda: {"result": "success"})
 
@@ -154,7 +219,10 @@ def test_query_authentication(self, mock_post, server_name, expect_auth, api_ins
 
     @patch("requests.post")
     def test_query_dev_server_missing_credentials(self, mock_post, api_instance):
-        """Test that dev servers raise error when credentials are missing."""
+        """
+        Test error handling for dev servers with missing credentials.
+        Verifies appropriate error messages and authentication requirements.
+        """
         api_instance.server_name = "xli"
         api_instance._provided_user = None
         api_instance._provided_password = None
@@ -164,7 +232,13 @@ def test_query_dev_server_missing_credentials(self, mock_post, api_instance):
 
     @patch("sde_collections.sinequa_api.Api._execute_sql_query")
     def test_get_full_texts_batch_size_reduction(self, mock_execute_sql, api_instance):
-        """Test that batch size reduces appropriately on failure and continues processing."""
+        """
+        Test batch size reduction logic when queries fail.
+        Verifies:
+        1. Progressive batch size reduction
+        2. Retry mechanism
+        3. Successful recovery
+        """
         # Mock first query to fail, then succeed with smaller batch
         mock_execute_sql.side_effect = [
             requests.RequestException("Query too large"),  # First attempt fails
@@ -181,7 +255,7 @@ def test_get_full_texts_batch_size_reduction(self, mock_execute_sql, api_instanc
         assert len(batches[0]) == 1
         assert batches[0][0]["url"] == "http://example.com/1"
 
-        # Verify the calls made - first with original size, then with reduced size
+        # Verify batch size reduction logic
         assert mock_execute_sql.call_count == 2
         first_call = mock_execute_sql.call_args_list[0][0][0]
         second_call = mock_execute_sql.call_args_list[1][0][0]
@@ -190,24 +264,88 @@ def test_get_full_texts_batch_size_reduction(self, mock_execute_sql, api_instanc
 
     @patch("sde_collections.sinequa_api.Api._execute_sql_query")
     def test_get_full_texts_minimum_batch_size(self, mock_execute_sql, api_instance):
-        """Test behavior when reaching minimum batch size."""
+        """
+        Test behavior when reaching minimum batch size.
+        Verifies error handling at minimum batch size threshold.
+        """
         mock_execute_sql.side_effect = requests.RequestException("Query failed")
 
         # Start with batch_size=4, min_batch_size=1
-        # Should try: 4 -> 2 -> 1 -> raise error
         with pytest.raises(ValueError, match="Failed to process batch even at minimum size 1"):
             list(api_instance.get_full_texts("test_folder", batch_size=4, min_batch_size=1))
 
-        # Should have tried 3 times before giving up
+        # Verify retry attempts
         assert mock_execute_sql.call_count == 3
         calls = mock_execute_sql.call_args_list
         assert "COUNT 4" in calls[0][0][0]  # First try with 4
         assert "COUNT 2" in calls[1][0][0]  # Second try with 2
         assert "COUNT 1" in calls[2][0][0]  # Final try with 1
 
+    @patch("requests.post")
+    def test_sql_query_construction(self, mock_post, api_instance):
+        """
+        Test direct SQL query execution with specific URL and payload validation.
+        Verifies:
+        1. Correct URL construction
+        2. Proper payload formatting
+        3. Token-based authentication
+        """
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"Rows": [["http://example.com", "sample text", "sample title"]]}
+        mock_post.return_value = mock_response
+
+        sql = "SELECT url1, text, title FROM test_index WHERE collection = '/SDE/sample_folder/'"
+        api_instance._execute_sql_query(sql)
+
+        # Verify URL and payload construction
+        mock_post.assert_called_once()
+        call_args = mock_post.call_args
+
+        # Get the actual payload from the call arguments
+        _, kwargs = call_args
+        payload = json.loads(kwargs.get("data", "{}"))
+
+        # Verify each component separately
+        assert "engine.sql" in call_args[0][0]  # Verify endpoint
+        assert kwargs["headers"]["Authorization"] == "Bearer test_token"  # Verify token usage
+        assert payload["sql"] == sql  # Verify SQL query inclusion
+
+    def test_process_full_text_response(self, api_instance):
+        """
+        Test static method for processing full text response data.
+        Verifies:
+        1. Correct parsing of raw response data
+        2. Proper dictionary structure creation
+        3. Error handling for invalid response format
+        """
+        # Test valid response processing
+        raw_response = {
+            "Rows": [
+                ["http://example.com/article1", "Full text 1", "Title 1"],
+                ["http://example.com/article2", "Full text 2", "Title 2"],
+            ]
+        }
+        expected = [
+            {"url": "http://example.com/article1", "full_text": "Full text 1", "title": "Title 1"},
+            {"url": "http://example.com/article2", "full_text": "Full text 2", "title": "Title 2"},
+        ]
+        processed = Api._process_full_text_response(raw_response)
+        assert processed == expected
+
+        # Test invalid response format
+        with pytest.raises(ValueError, match="Invalid response format"):
+            Api._process_full_text_response({"wrong_key": []})
+
     @patch("sde_collections.sinequa_api.Api._execute_sql_query")
     def test_get_full_texts_batch_size_progression(self, mock_execute_sql, api_instance):
-        """Test multiple batch size reductions followed by successful query."""
+        """
+        Test multiple batch size reductions followed by successful query.
+        Verifies:
+        1. Progressive batch size reduction steps
+        2. Recovery after multiple failures
+        3. Final successful query execution
+        """
         mock_execute_sql.side_effect = [
             requests.RequestException("First failure"),
             requests.RequestException("Second failure"),