From cffe7f1c0883d5ba09ff7ed2f9d502ac5db84420 Mon Sep 17 00:00:00 2001 From: bosd Date: Tue, 23 Sep 2025 21:43:27 +0000 Subject: [PATCH 1/2] feat(export): Export XMLID in hybrid read mode This commit enhances the `odoo-data-flow` tool to allow exporting the external XML ID of the main record while using the tool's fast `read` mode. When a user requests both `.id` and `id` in the fields list, and the export is running in `read` mode, the tool will now perform a secondary enrichment step. After fetching the main data with numeric IDs, it makes another call to Odoo to retrieve the XML IDs (`module.name`) for those numeric IDs. This is implemented using a Polars left join to merge the XML IDs back into the main DataFrame. The `id` column is then populated with the XML ID, and per the user's final clarification, the `.id` column containing the numeric database ID is preserved in the final output. This allows users to have access to both the database ID and the external ID for transformation steps. A comprehensive unit test has been added to verify this new functionality, including the case where a record may not have an XML ID. --- noxfile.py | 5 +- src/odoo_data_flow/export_threaded.py | 105 +++++++++++++++++++++--- tests/test_export_threaded.py | 111 ++++++++++++++++---------- 3 files changed, 166 insertions(+), 55 deletions(-) diff --git a/noxfile.py b/noxfile.py index 416efc67..e6c24adb 100644 --- a/noxfile.py +++ b/noxfile.py @@ -122,14 +122,15 @@ def precommit(session: nox.Session) -> None: session.run( "uv", "sync", + "--python", + session.python, "--group", "dev", "--group", "lint", - external=True, ) session.install("pydoclint") - session.run("pre-commit", *args, external=True) + session.run("pre-commit", *args) if args and args[0] == "install": activate_virtualenv_in_precommit_hooks(session) diff --git a/src/odoo_data_flow/export_threaded.py b/src/odoo_data_flow/export_threaded.py index cc499478..51f38161 100755 --- a/src/odoo_data_flow/export_threaded.py +++ b/src/odoo_data_flow/export_threaded.py @@ -408,6 +408,60 @@ def _clean_and_transform_batch( return casted_df.select(list(polars_schema.keys())) +def _enrich_main_df_with_xml_ids( + df: pl.DataFrame, connection: Any, model_name: str +) -> pl.DataFrame: + """Enriches a DataFrame with XML IDs for the main records. + + This function takes a DataFrame containing a '.id' column with numeric + database IDs, fetches their corresponding external XML IDs from Odoo, + and uses them to populate the 'id' column, preserving the '.id' column. + + Args: + df: The Polars DataFrame to enrich. Must contain an '.id' column. + connection: The active Odoo connection object. + model_name: The name of the Odoo model being exported. + + Returns: + The enriched DataFrame with the 'id' column populated with XML IDs + and the '.id' column preserved. + """ + if ".id" not in df.columns: + log.warning("'.id' column not found, cannot perform main XML ID enrichment.") + return df + + db_ids = df.get_column(".id").unique().drop_nulls().to_list() + if not db_ids: + log.debug("No database IDs found to enrich; ensuring 'id' is empty.") + # Overwrite 'id' with nulls, keep '.id' + return df.with_columns(pl.lit(None, dtype=pl.String).alias("id")) + + log.info(f"Fetching XML IDs for {len(db_ids)} main records...") + ir_model_data = connection.get_model("ir.model.data") + xml_id_data = ir_model_data.search_read( + [("model", "=", model_name), ("res_id", "in", db_ids)], + ["res_id", "module", "name"], + context={"active_test": False}, + ) + + if not xml_id_data: + log.warning(f"No XML IDs found for the exported {model_name} records.") + return df.with_columns(pl.lit(None, dtype=pl.String).alias("id")) + + df_xml_ids = ( + pl.from_dicts(xml_id_data) + .with_columns( + pl.format("{}.{}", pl.col("module"), pl.col("name")).alias("xml_id") + ) + .select(pl.col("res_id").cast(pl.Int64), "xml_id") + .unique(subset=["res_id"], keep="first") + ) + + # Join to get the xml_id, overwrite 'id', and drop temporary columns. + df_enriched = df.join(df_xml_ids, left_on=".id", right_on="res_id", how="left") + return df_enriched.with_columns(pl.col("xml_id").alias("id")).drop("xml_id") + + def _process_export_batches( # noqa: C901 rpc_thread: "RPCThreadExport", total_ids: int, @@ -419,6 +473,7 @@ def _process_export_batches( # noqa: C901 session_dir: Optional[Path], is_resuming: bool, encoding: str, + enrich_main_xml_id: bool = False, ) -> Optional[pl.DataFrame]: """Processes exported batches. @@ -474,6 +529,11 @@ def _process_export_batches( # noqa: C901 df, field_types, polars_schema ) + if enrich_main_xml_id: + final_batch_df = _enrich_main_df_with_xml_ids( + final_batch_df, rpc_thread.connection, model_name + ) + if output and streaming: if not header_written: if is_resuming: @@ -521,6 +581,11 @@ def _process_export_batches( # noqa: C901 return None if not all_cleaned_dfs: log.warning("No data was returned from the export.") + # Adjust schema for empty DataFrame if enrichment was active + if enrich_main_xml_id: + # The .id column is correctly typed as Int64. The id column, which + # would also be Int64, needs its type changed to String for the header. + polars_schema["id"] = pl.String() empty_df = pl.DataFrame(schema=polars_schema) if output: if is_resuming: @@ -557,6 +622,7 @@ def _determine_export_strategy( Optional[dict[str, dict[str, Any]]], bool, bool, + bool, ]: """Perform pre-flight checks and determine the best export strategy.""" preliminary_read_mode = technical_names or any( @@ -567,7 +633,7 @@ def _determine_export_strategy( ) if not model_obj or not fields_info: - return None, None, None, False, False + return None, None, None, False, False, False has_read_specifiers = any(f.endswith("/.id") or f == ".id" for f in header) has_xml_id_specifiers = any(f.endswith("/id") for f in header) @@ -586,7 +652,7 @@ def _determine_export_strategy( f"(e.g., {invalid_fields}) is not supported in hybrid mode. " "Only 'field/id' is allowed for enrichment." ) - return None, None, None, False, False + return None, None, None, False, False, False technical_types = {"selection", "binary"} has_technical_fields = any( @@ -597,7 +663,15 @@ def _determine_export_strategy( technical_names or has_read_specifiers or is_hybrid or has_technical_fields ) - if is_hybrid: + # --- New logic for main record XML ID enrichment --- + enrich_main_xml_id = ".id" in header and "id" in header and force_read_method + + if enrich_main_xml_id: + log.info( + "Main record XML ID enrichment activated. " + "'.id' will be used to fetch and populate 'id'." + ) + elif is_hybrid: log.info("Hybrid export mode activated. Using 'read' with XML ID enrichment.") elif has_technical_fields: log.info("Read method auto-enabled for 'selection' or 'binary' fields.") @@ -613,9 +687,16 @@ def _determine_export_strategy( f"Mixing export-style specifiers {invalid_fields} " f"is not supported in pure 'read' mode." ) - return None, None, None, False, False - - return connection, model_obj, fields_info, force_read_method, is_hybrid + return None, None, None, False, False, False + + return ( + connection, + model_obj, + fields_info, + force_read_method, + is_hybrid, + enrich_main_xml_id, + ) def _resume_existing_session( @@ -692,9 +773,14 @@ def export_data( if not session_dir: return False, session_id, 0, None - connection, model_obj, fields_info, force_read_method, is_hybrid = ( - _determine_export_strategy(config, model, header, technical_names) - ) + ( + connection, + model_obj, + fields_info, + force_read_method, + is_hybrid, + enrich_main_xml_id, + ) = _determine_export_strategy(config, model, header, technical_names) if not connection or not model_obj or not fields_info: return False, session_id, 0, None @@ -747,6 +833,7 @@ def export_data( session_dir=session_dir, is_resuming=is_resuming, encoding=encoding, + enrich_main_xml_id=enrich_main_xml_id, ) # --- Finalization and Cleanup --- diff --git a/tests/test_export_threaded.py b/tests/test_export_threaded.py index d403acbf..77e8c418 100644 --- a/tests/test_export_threaded.py +++ b/tests/test_export_threaded.py @@ -548,6 +548,7 @@ def test_export_data_streaming_no_output( {"name": {"type": "char"}}, False, False, + False, ) success, _, _, result_df = export_data( @@ -736,50 +737,6 @@ def test_export_hybrid_mode_success(self, mock_conf_lib: MagicMock) -> None: ) assert_frame_equal(result_df, expected_df) - def test_export_id_and_dot_id_in_read_mode(self, mock_conf_lib: MagicMock) -> None: - """Test the read mode. - - Tests that in read() mode, both 'id' and '.id' correctly resolve - to the integer database ID. - """ - # --- Arrange --- - header = [".id", "id", "name"] - mock_model = mock_conf_lib.return_value.get_model.return_value - mock_model.search.return_value = [101, 102] - mock_model.read.return_value = [ - {"id": 101, "name": "Record 101"}, - {"id": 102, "name": "Record 102"}, - ] - mock_model.fields_get.return_value = { - "id": {"type": "integer"}, - "name": {"type": "char"}, - } - - # --- Act --- - _, _, _, result_df = export_data( - config="dummy.conf", - model="res.partner", - domain=[], - header=header, - output=None, - technical_names=True, - ) - - # --- Assert --- - assert result_df is not None - - # *** FIX ***: Use the 'schema' argument to define dtypes on creation. - expected_df = pl.DataFrame( - { - ".id": [101, 102], - "id": [101, 102], - "name": ["Record 101", "Record 102"], - }, - schema={".id": pl.Int64, "id": pl.Int64, "name": pl.String}, - ) - - assert_frame_equal(result_df, expected_df) - def test_export_id_in_export_data_mode(self, mock_conf_lib: MagicMock) -> None: """Test export id in export data. @@ -983,3 +940,69 @@ def test_process_export_batches_handles_inconsistent_schemas( ) final_df = final_df.sort("id") assert_frame_equal(final_df, expected_df) + + def test_export_main_record_xml_id_enrichment( + self, mock_conf_lib: MagicMock + ) -> None: + """Test main record xml id. + + Tests that when '.id' and 'id' are requested, the 'id' column is + enriched with the main record's XML ID. + """ + # --- Arrange --- + header = [".id", "id", "name"] + mock_model = mock_conf_lib.return_value.get_model.return_value + mock_model.search.return_value = [1, 2] + + # 1. Mock the primary read() call which returns numeric IDs + mock_model.read.return_value = [ + {"id": 1, "name": "Partner A"}, + {"id": 2, "name": "Partner B"}, + ] + + # 2. Mock the metadata call + mock_model.fields_get.return_value = { + "id": {"type": "integer"}, + ".id": {"type": "integer"}, + "name": {"type": "char"}, + } + + # 3. Mock the secondary XML ID lookup on 'ir.model.data' + # Note: Partner B (id=2) does not have an XML ID to test the null case. + mock_ir_model_data = MagicMock() + mock_ir_model_data.search_read.return_value = [ + {"res_id": 1, "module": "base", "name": "partner_a_xmlid"} + ] + + # Make get_model return the main model first, then the ir.model.data mock + # This needs to be reset for each test that uses it this way. + mock_conf_lib.return_value.get_model.side_effect = [ + mock_model, + mock_ir_model_data, + ] + + # --- Act --- + success, _, _, result_df = export_data( + config="dummy.conf", + model="res.partner", + domain=[], + header=header, + output=None, + ) + + # --- Assert --- + assert success is True + assert result_df is not None + + # The '.id' column should be preserved, and 'id' should be the XML ID + expected_df = pl.DataFrame( + { + ".id": [1, 2], + "id": ["base.partner_a_xmlid", None], + "name": ["Partner A", "Partner B"], + }, + schema={".id": pl.Int64, "id": pl.String, "name": pl.String}, + ) + + # Sort by name to ensure consistent order for comparison + assert_frame_equal(result_df.sort("name"), expected_df.sort("name")) From bd946041724dd06b7e8417b0b02d4d45d7419fdb Mon Sep 17 00:00:00 2001 From: bosd Date: Wed, 24 Sep 2025 18:52:52 +0000 Subject: [PATCH 2/2] fix(ci): Stabilize nox sessions by explicitly targeting the session python The `nox` sessions were failing in CUpdate because `uv sync` was not installing dependencies into the correct session-specific virtual environments. This was caused by `uv` ignoring the `VIRTUAL_ENV` set by `nox` and defaulting to a different environment. This commit resolves the issue by: 1. Adding `--python str(session.python)` to all `uv sync` calls in `noxfile.py`. This explicitly tells `uv` which python executable to use, forcing it to install dependencies into the correct isolated virtual environment for each session. 2. Removing all `external=True` flags from the `uv sync` calls, as they are no longer needed and were part of the problem. 3. Removing a redundant `session.install("pydoclint")` call, as the dependency is already handled by `uv sync`. These changes ensure that `tests` and `mypy` sessions run reliably. The `pre-commit` session is now configured correctly, although it may still face issues in certain CUpdate environments with unusual PATH configurations. --- noxfile.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/noxfile.py b/noxfile.py index e6c24adb..ca8f0df6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -123,13 +123,12 @@ def precommit(session: nox.Session) -> None: "uv", "sync", "--python", - session.python, + str(session.python), "--group", "dev", "--group", "lint", ) - session.install("pydoclint") session.run("pre-commit", *args) if args and args[0] == "install": activate_virtualenv_in_precommit_hooks(session) @@ -143,11 +142,12 @@ def mypy(session: nox.Session) -> None: session.run( "uv", "sync", + "--python", + str(session.python), "--group", "dev", "--group", "mypy", - external=True, ) session.install("mypy") @@ -166,11 +166,12 @@ def tests(session: nox.Session) -> None: session.run( "uv", "sync", + "--python", + str(session.python), "--group", "dev", "--group", "lint", - external=True, ) session.install("pytest", "coverage", "pytest-mock") @@ -221,11 +222,12 @@ def typeguard_tests(session: nox.Session) -> None: session.run( "uv", "sync", + "--python", + str(session.python), "--group", "dev", "--group", "typeguard", - external=True, ) session.install("typeguard", "pytest", "pytest-mock") @@ -245,11 +247,12 @@ def xdoctest(session: nox.Session) -> None: session.run( "uv", "sync", + "--python", + str(session.python), "--group", "dev", "--group", "xdoctest", - external=True, ) session.install("xdoctest") session.install("-e", ".") @@ -266,11 +269,12 @@ def docs_build(session: nox.Session) -> None: session.run( "uv", "sync", + "--python", + str(session.python), "--group", "dev", "--group", "docs", - external=True, ) session.install( "sphinx", @@ -296,9 +300,10 @@ def docs(session: nox.Session) -> None: session.run( "uv", "sync", + "--python", + str(session.python), "--group", "docs", - external=True, env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location}, )