Merge branch 'fix-o2m-id-field-handling-rebased3' of github.com:OdooDataFlow/odoo-data-flow into fix-o2m-id-field-handling-rebased3

bosd · bosd · commit 290c891e896c · 2025-09-30T22:50:13.000+02:00
diff --git a/src/odoo_data_flow/import_threaded.py b/src/odoo_data_flow/import_threaded.py
@@ -888,7 +888,13 @@ def _execute_load_batch(  # noqa: C901
                 f"Batch {batch_number}: Fail mode active, using `create` method."
             )
         result = _create_batch_individually(
-            model, batch_lines, batch_header, uid_index, context, ignore_list, progress
+            model,
+            batch_lines,
+            batch_header,
+            uid_index,
+            context,
+            ignore_list,
+            progress,
         )
         result["success"] = bool(result.get("id_map"))
         return result
@@ -979,6 +985,14 @@ def _execute_load_batch(  # noqa: C901
                     f"{load_lines[0][:10] if load_lines and load_lines[0] else []}"
                     "Model has no _fields attribute, using raw values for load method"
                 )
+                # Even when model has no _fields, we still need to sanitize the
+                # unique ID field to prevent XML ID constraint violations.
+                for row in load_lines:
+                    # This is more efficient than a nested list comprehension as
+                    # it modifies the list in-place and only targets the
+                    # required cell.
+                    if uid_index < len(row) and row[uid_index] is not None:
+                        row[uid_index] = to_xmlid(str(row[uid_index]))
         try:
             log.debug(f"Attempting `load` for chunk of batch {batch_number}...")
 
@@ -1039,7 +1053,11 @@ def _execute_load_batch(  # noqa: C901
                 # to fail file
                 error_msg = res["messages"][0].get("message", "Batch load failed.")
                 log.error(f"Capturing load failure for fail file: {error_msg}")
-                # We'll add the failed lines to aggregated_failed_lines at the end
+                # Add all current chunk records to failed lines since there are
+                # error messages
+                for line in current_chunk:
+                    failed_line = [*line, f"Load failed: {error_msg}"]
+                    aggregated_failed_lines.append(failed_line)
 
             # Use sanitized IDs for the id_map to match what was actually sent to Odoo
             id_map = {
@@ -1063,7 +1081,26 @@ def _execute_load_batch(  # noqa: C901
             successful_count = len(created_ids)
             total_count = len(load_lines)
 
-            if successful_count < total_count:
+            # If there are error messages from Odoo, all records in chunk
+            # should be marked as failed
+            if res.get("messages"):
+                # All records in the chunk are considered failed due to
+                # error messages
+                successful_count = len(created_ids)
+                total_count = len(load_lines)
+
+            # If there are error messages from Odoo, all records in chunk should
+            # be marked as failed
+            if res.get("messages"):
+                # All records in the chunk are considered failed due to
+                # error messages
+                log.info(
+                    f"All {len(current_chunk)} records in chunk marked as "
+                    f"failed due to error messages"
+                )
+                # Don't add them again since they were already added in the
+                #  earlier block
+            elif successful_count < total_count:
                 failed_count = total_count - successful_count
                 log.info(f"Capturing {failed_count} failed records for fail file")
                 # Add error information to the lines that failed
@@ -1072,8 +1109,6 @@ def _execute_load_batch(  # noqa: C901
                     if i >= len(created_ids) or created_ids[i] is None:
                         # This record failed, add it to failed_lines with error info
                         error_msg = "Record creation failed"
-                        if res.get("messages"):
-                            error_msg = res["messages"][0].get("message", error_msg)
 
                         failed_line = [*list(line), f"Load failed: {error_msg}"]
                         aggregated_failed_lines.append(failed_line)
@@ -1170,6 +1205,14 @@ def _execute_load_batch(  # noqa: C901
                 continue
 
             # For all other exceptions, use the original scalable error detection
+            # Also check for constraint violations which should be treated as
+            # non-scalable
+            is_constraint_violation = (
+                "constraint" in error_str
+                or "violation" in error_str
+                or "not-null constraint" in error_str
+                or "mandatory field" in error_str
+            )
             is_scalable_error = (
                 "memory" in error_str
                 or "out of memory" in error_str
@@ -1184,6 +1227,24 @@ def _execute_load_batch(  # noqa: C901
                 or "poolerror" in error_str.lower()
             )
 
+            # Handle constraint violations separately - these are data issues,
+            #  not scalable issues
+            if is_constraint_violation:
+                # Constraint violations are data problems, add all records to
+                # failed lines
+                clean_error = str(e).strip().replace("\\n", " ")
+                log.error(
+                    f"Constraint violation in batch {batch_number}: {clean_error}"
+                )
+                error_msg = f"Constraint violation: {clean_error}"
+
+                for line in current_chunk:
+                    failed_line = [*line, error_msg]
+                    aggregated_failed_lines.append(failed_line)
+
+                lines_to_process = lines_to_process[chunk_size:]
+                continue
+
             if is_scalable_error and chunk_size > 1:
                 chunk_size = max(1, chunk_size // 2)
                 progress.console.print(
diff --git a/tests/test_import_threaded.py b/tests/test_import_threaded.py
@@ -17,6 +17,7 @@
     _setup_fail_file,
     import_data,
 )
+from odoo_data_flow.lib.internal.tools import to_xmlid
 
 
 class TestImportData:
@@ -220,7 +221,12 @@ def test_batch_scales_down_on_gateway_error(
             "ignore_list": [],
         }
         batch_header = ["id", "name"]
-        batch_lines = [["rec1", "A"], ["rec2", "B"], ["rec3", "C"], ["rec4", "D"]]
+        batch_lines = [
+            ["rec1", "A"],
+            ["rec2", "B"],
+            ["rec3", "C"],
+            ["rec4", "D"],
+        ]
 
         # Set up the return value for the mocked function before the call
         mock_create_individually.return_value = {
@@ -415,7 +421,10 @@ def test_pass_2_handles_failed_batch(self, mock_run_pass: MagicMock) -> None:
                 (2, {"parent_id": 101}, "Access Error"),
             ],
         }
-        mock_run_pass.return_value = (failed_write_result, False)  # result, aborted
+        mock_run_pass.return_value = (
+            failed_write_result,
+            False,
+        )  # result, aborted
 
         # Act
         with Progress() as progress:
@@ -538,7 +547,10 @@ def test_run_threaded_pass_keyboard_interrupt(
         self, mock_as_completed: MagicMock
     ) -> None:
         """Test that a KeyboardInterrupt is handled gracefully."""
-        from odoo_data_flow.import_threaded import RPCThreadImport, _run_threaded_pass
+        from odoo_data_flow.import_threaded import (
+            RPCThreadImport,
+            _run_threaded_pass,
+        )
 
         rpc_thread = RPCThreadImport(1, Progress(), MagicMock())
         rpc_thread.task_id = rpc_thread.progress.add_task("test")
@@ -743,3 +755,94 @@ def test_execute_load_batch_successfully_aggregates_all_records() -> None:
     assert result["id_map"]["rec4"] == 4
     # Should have no failed lines
     assert len(result["failed_lines"]) == 0
+
+
+def test_execute_load_batch_sanitizes_ids_when_model_has_no_fields() -> None:
+    """Test that unique ID field values are sanitized."""
+    mock_model = MagicMock()
+    # Model has no _fields attribute
+    mock_model._fields = None
+    mock_model.load.return_value = {"ids": [1, 2]}
+    mock_progress = MagicMock()
+    thread_state = {
+        "model": mock_model,
+        "progress": mock_progress,
+        "unique_id_field_index": 0,  # Index of the ID column
+        "ignore_list": [],
+    }
+    batch_header = ["id", "name"]
+    # IDs with spaces that should be sanitized
+    batch_lines = [
+        ["product_template_2023_02_08 09_45_32_0001", "Product 1"],
+        ["another id with spaces", "Product 2"],
+    ]
+
+    from odoo_data_flow.import_threaded import _execute_load_batch
+    from odoo_data_flow.lib.internal.tools import to_xmlid
+
+    # Call the function
+    result = _execute_load_batch(thread_state, batch_lines, batch_header, 1)
+
+    # Verify that model.load was called with properly sanitized IDs
+    # The call_args should show that the IDs were sanitized
+    # (spaces replaced with underscores)
+    call_args = mock_model.load.call_args
+    sent_header, sent_data = call_args[0]  # Get the positional arguments
+
+    # Verify header is unchanged
+    assert sent_header == batch_header
+    # Verify that the IDs in the data have been sanitized
+    assert sent_data[0][0] == to_xmlid(
+        "product_template_2023_02_08 09_45_32_0001"
+    )  # Should be sanitized
+    assert sent_data[1][0] == to_xmlid("another id with spaces")  # Should be sanitized
+
+    # Verify the id_map uses the sanitized IDs
+    expected_id1 = to_xmlid("product_template_2023_02_08 09_45_32_0001")
+    expected_id2 = to_xmlid("another id with spaces")
+    assert result["id_map"][expected_id1] == 1
+    assert result["id_map"][expected_id2] == 2
+
+
+def test_execute_load_batch_sanitizes_ids_in_model_fields_case() -> None:
+    """Test that unique ID field values are sanitized."""
+    mock_model = MagicMock()
+    # Model has _fields attribute (like normal Odoo models)
+    mock_model._fields = {"id": {"type": "char"}, "name": {"type": "char"}}
+    mock_model.load.return_value = {"ids": [1, 2]}
+    mock_progress = MagicMock()
+    thread_state = {
+        "model": mock_model,
+        "progress": mock_progress,
+        "unique_id_field_index": 0,  # Index of the ID column
+        "ignore_list": [],
+    }
+    batch_header = ["id", "name"]
+    # IDs with spaces that should be sanitized
+    batch_lines = [
+        ["product_template_2023_02_08 09_45_32_0003", "Product 1"],
+        ["different id with spaces", "Product 2"],
+    ]
+
+    # Call the function
+    result = _execute_load_batch(thread_state, batch_lines, batch_header, 1)
+
+    # Verify that model.load was called with properly sanitized IDs
+    call_args = mock_model.load.call_args
+    sent_header, sent_data = call_args[0]  # Get the positional arguments
+
+    # Verify header is unchanged
+    assert sent_header == batch_header
+    # Verify that the IDs in the data have been sanitized
+    assert sent_data[0][0] == to_xmlid(
+        "product_template_2023_02_08 09_45_32_0003"
+    )  # Should be sanitized
+    assert sent_data[1][0] == to_xmlid(
+        "different id with spaces"
+    )  # Should be sanitized
+
+    # Verify the id_map uses the sanitized IDs
+    expected_id1 = to_xmlid("product_template_2023_02_08 09_45_32_0003")
+    expected_id2 = to_xmlid("different id with spaces")
+    assert result["id_map"][expected_id1] == 1
+    assert result["id_map"][expected_id2] == 2