Skip to content

Commit e94cc81

Browse files
committed
feat(export): Export XMLID in hybrid read mode
This commit enhances the `odoo-data-flow` tool to allow exporting the external XML ID of the main record while using the tool's fast `read` mode. When a user requests both `.id` and `id` in the fields list, and the export is running in `read` mode, the tool will now perform a secondary enrichment step. After fetching the main data with numeric IDs, it makes another call to Odoo to retrieve the XML IDs (`module.name`) for those numeric IDs. This is implemented using a Polars left join to merge the XML IDs back into the main DataFrame. The `id` column is then populated with the XML ID, and per the user's final clarification, the `.id` column containing the numeric database ID is preserved in the final output. This allows users to have access to both the database ID and the external ID for transformation steps. A comprehensive unit test has been added to verify this new functionality, including the case where a record may not have an XML ID.
1 parent 05b0553 commit e94cc81

File tree

3 files changed

+166
-55
lines changed

3 files changed

+166
-55
lines changed

noxfile.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,14 +122,15 @@ def precommit(session: nox.Session) -> None:
122122
session.run(
123123
"uv",
124124
"sync",
125+
"--python",
126+
session.python,
125127
"--group",
126128
"dev",
127129
"--group",
128130
"lint",
129-
external=True,
130131
)
131132
session.install("pydoclint")
132-
session.run("pre-commit", *args, external=True)
133+
session.run("pre-commit", *args)
133134
if args and args[0] == "install":
134135
activate_virtualenv_in_precommit_hooks(session)
135136

src/odoo_data_flow/export_threaded.py

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,60 @@ def _clean_and_transform_batch(
408408
return casted_df.select(list(polars_schema.keys()))
409409

410410

411+
def _enrich_main_df_with_xml_ids(
412+
df: pl.DataFrame, connection: Any, model_name: str
413+
) -> pl.DataFrame:
414+
"""Enriches a DataFrame with XML IDs for the main records.
415+
416+
This function takes a DataFrame containing a '.id' column with numeric
417+
database IDs, fetches their corresponding external XML IDs from Odoo,
418+
and uses them to populate the 'id' column, preserving the '.id' column.
419+
420+
Args:
421+
df: The Polars DataFrame to enrich. Must contain an '.id' column.
422+
connection: The active Odoo connection object.
423+
model_name: The name of the Odoo model being exported.
424+
425+
Returns:
426+
The enriched DataFrame with the 'id' column populated with XML IDs
427+
and the '.id' column preserved.
428+
"""
429+
if ".id" not in df.columns:
430+
log.warning("'.id' column not found, cannot perform main XML ID enrichment.")
431+
return df
432+
433+
db_ids = df.get_column(".id").unique().drop_nulls().to_list()
434+
if not db_ids:
435+
log.debug("No database IDs found to enrich; ensuring 'id' is empty.")
436+
# Overwrite 'id' with nulls, keep '.id'
437+
return df.with_columns(pl.lit(None, dtype=pl.String).alias("id"))
438+
439+
log.info(f"Fetching XML IDs for {len(db_ids)} main records...")
440+
ir_model_data = connection.get_model("ir.model.data")
441+
xml_id_data = ir_model_data.search_read(
442+
[("model", "=", model_name), ("res_id", "in", db_ids)],
443+
["res_id", "module", "name"],
444+
context={"active_test": False},
445+
)
446+
447+
if not xml_id_data:
448+
log.warning(f"No XML IDs found for the exported {model_name} records.")
449+
return df.with_columns(pl.lit(None, dtype=pl.String).alias("id"))
450+
451+
df_xml_ids = (
452+
pl.from_dicts(xml_id_data)
453+
.with_columns(
454+
pl.format("{}.{}", pl.col("module"), pl.col("name")).alias("xml_id")
455+
)
456+
.select(pl.col("res_id").cast(pl.Int64), "xml_id")
457+
.unique(subset=["res_id"], keep="first")
458+
)
459+
460+
# Join to get the xml_id, overwrite 'id', and drop temporary columns.
461+
df_enriched = df.join(df_xml_ids, left_on=".id", right_on="res_id", how="left")
462+
return df_enriched.with_columns(pl.col("xml_id").alias("id")).drop("xml_id")
463+
464+
411465
def _process_export_batches( # noqa: C901
412466
rpc_thread: "RPCThreadExport",
413467
total_ids: int,
@@ -419,6 +473,7 @@ def _process_export_batches( # noqa: C901
419473
session_dir: Optional[Path],
420474
is_resuming: bool,
421475
encoding: str,
476+
enrich_main_xml_id: bool = False,
422477
) -> Optional[pl.DataFrame]:
423478
"""Processes exported batches.
424479
@@ -474,6 +529,11 @@ def _process_export_batches( # noqa: C901
474529
df, field_types, polars_schema
475530
)
476531

532+
if enrich_main_xml_id:
533+
final_batch_df = _enrich_main_df_with_xml_ids(
534+
final_batch_df, rpc_thread.connection, model_name
535+
)
536+
477537
if output and streaming:
478538
if not header_written:
479539
if is_resuming:
@@ -521,6 +581,11 @@ def _process_export_batches( # noqa: C901
521581
return None
522582
if not all_cleaned_dfs:
523583
log.warning("No data was returned from the export.")
584+
# Adjust schema for empty DataFrame if enrichment was active
585+
if enrich_main_xml_id:
586+
# The .id column is correctly typed as Int64. The id column, which
587+
# would also be Int64, needs its type changed to String for the header.
588+
polars_schema["id"] = pl.String()
524589
empty_df = pl.DataFrame(schema=polars_schema)
525590
if output:
526591
if is_resuming:
@@ -557,6 +622,7 @@ def _determine_export_strategy(
557622
Optional[dict[str, dict[str, Any]]],
558623
bool,
559624
bool,
625+
bool,
560626
]:
561627
"""Perform pre-flight checks and determine the best export strategy."""
562628
preliminary_read_mode = technical_names or any(
@@ -567,7 +633,7 @@ def _determine_export_strategy(
567633
)
568634

569635
if not model_obj or not fields_info:
570-
return None, None, None, False, False
636+
return None, None, None, False, False, False
571637

572638
has_read_specifiers = any(f.endswith("/.id") or f == ".id" for f in header)
573639
has_xml_id_specifiers = any(f.endswith("/id") for f in header)
@@ -586,7 +652,7 @@ def _determine_export_strategy(
586652
f"(e.g., {invalid_fields}) is not supported in hybrid mode. "
587653
"Only 'field/id' is allowed for enrichment."
588654
)
589-
return None, None, None, False, False
655+
return None, None, None, False, False, False
590656

591657
technical_types = {"selection", "binary"}
592658
has_technical_fields = any(
@@ -597,7 +663,15 @@ def _determine_export_strategy(
597663
technical_names or has_read_specifiers or is_hybrid or has_technical_fields
598664
)
599665

600-
if is_hybrid:
666+
# --- New logic for main record XML ID enrichment ---
667+
enrich_main_xml_id = ".id" in header and "id" in header and force_read_method
668+
669+
if enrich_main_xml_id:
670+
log.info(
671+
"Main record XML ID enrichment activated. "
672+
"'.id' will be used to fetch and populate 'id'."
673+
)
674+
elif is_hybrid:
601675
log.info("Hybrid export mode activated. Using 'read' with XML ID enrichment.")
602676
elif has_technical_fields:
603677
log.info("Read method auto-enabled for 'selection' or 'binary' fields.")
@@ -613,9 +687,16 @@ def _determine_export_strategy(
613687
f"Mixing export-style specifiers {invalid_fields} "
614688
f"is not supported in pure 'read' mode."
615689
)
616-
return None, None, None, False, False
617-
618-
return connection, model_obj, fields_info, force_read_method, is_hybrid
690+
return None, None, None, False, False, False
691+
692+
return (
693+
connection,
694+
model_obj,
695+
fields_info,
696+
force_read_method,
697+
is_hybrid,
698+
enrich_main_xml_id,
699+
)
619700

620701

621702
def _resume_existing_session(
@@ -692,9 +773,14 @@ def export_data(
692773
if not session_dir:
693774
return False, session_id, 0, None
694775

695-
connection, model_obj, fields_info, force_read_method, is_hybrid = (
696-
_determine_export_strategy(config, model, header, technical_names)
697-
)
776+
(
777+
connection,
778+
model_obj,
779+
fields_info,
780+
force_read_method,
781+
is_hybrid,
782+
enrich_main_xml_id,
783+
) = _determine_export_strategy(config, model, header, technical_names)
698784
if not connection or not model_obj or not fields_info:
699785
return False, session_id, 0, None
700786

@@ -747,6 +833,7 @@ def export_data(
747833
session_dir=session_dir,
748834
is_resuming=is_resuming,
749835
encoding=encoding,
836+
enrich_main_xml_id=enrich_main_xml_id,
750837
)
751838

752839
# --- Finalization and Cleanup ---

tests/test_export_threaded.py

Lines changed: 67 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ def test_export_data_streaming_no_output(
548548
{"name": {"type": "char"}},
549549
False,
550550
False,
551+
False,
551552
)
552553

553554
success, _, _, result_df = export_data(
@@ -736,50 +737,6 @@ def test_export_hybrid_mode_success(self, mock_conf_lib: MagicMock) -> None:
736737
)
737738
assert_frame_equal(result_df, expected_df)
738739

739-
def test_export_id_and_dot_id_in_read_mode(self, mock_conf_lib: MagicMock) -> None:
740-
"""Test the read mode.
741-
742-
Tests that in read() mode, both 'id' and '.id' correctly resolve
743-
to the integer database ID.
744-
"""
745-
# --- Arrange ---
746-
header = [".id", "id", "name"]
747-
mock_model = mock_conf_lib.return_value.get_model.return_value
748-
mock_model.search.return_value = [101, 102]
749-
mock_model.read.return_value = [
750-
{"id": 101, "name": "Record 101"},
751-
{"id": 102, "name": "Record 102"},
752-
]
753-
mock_model.fields_get.return_value = {
754-
"id": {"type": "integer"},
755-
"name": {"type": "char"},
756-
}
757-
758-
# --- Act ---
759-
_, _, _, result_df = export_data(
760-
config="dummy.conf",
761-
model="res.partner",
762-
domain=[],
763-
header=header,
764-
output=None,
765-
technical_names=True,
766-
)
767-
768-
# --- Assert ---
769-
assert result_df is not None
770-
771-
# *** FIX ***: Use the 'schema' argument to define dtypes on creation.
772-
expected_df = pl.DataFrame(
773-
{
774-
".id": [101, 102],
775-
"id": [101, 102],
776-
"name": ["Record 101", "Record 102"],
777-
},
778-
schema={".id": pl.Int64, "id": pl.Int64, "name": pl.String},
779-
)
780-
781-
assert_frame_equal(result_df, expected_df)
782-
783740
def test_export_id_in_export_data_mode(self, mock_conf_lib: MagicMock) -> None:
784741
"""Test export id in export data.
785742
@@ -983,3 +940,69 @@ def test_process_export_batches_handles_inconsistent_schemas(
983940
)
984941
final_df = final_df.sort("id")
985942
assert_frame_equal(final_df, expected_df)
943+
944+
def test_export_main_record_xml_id_enrichment(
945+
self, mock_conf_lib: MagicMock
946+
) -> None:
947+
"""Test main record xml id.
948+
949+
Tests that when '.id' and 'id' are requested, the 'id' column is
950+
enriched with the main record's XML ID.
951+
"""
952+
# --- Arrange ---
953+
header = [".id", "id", "name"]
954+
mock_model = mock_conf_lib.return_value.get_model.return_value
955+
mock_model.search.return_value = [1, 2]
956+
957+
# 1. Mock the primary read() call which returns numeric IDs
958+
mock_model.read.return_value = [
959+
{"id": 1, "name": "Partner A"},
960+
{"id": 2, "name": "Partner B"},
961+
]
962+
963+
# 2. Mock the metadata call
964+
mock_model.fields_get.return_value = {
965+
"id": {"type": "integer"},
966+
".id": {"type": "integer"},
967+
"name": {"type": "char"},
968+
}
969+
970+
# 3. Mock the secondary XML ID lookup on 'ir.model.data'
971+
# Note: Partner B (id=2) does not have an XML ID to test the null case.
972+
mock_ir_model_data = MagicMock()
973+
mock_ir_model_data.search_read.return_value = [
974+
{"res_id": 1, "module": "base", "name": "partner_a_xmlid"}
975+
]
976+
977+
# Make get_model return the main model first, then the ir.model.data mock
978+
# This needs to be reset for each test that uses it this way.
979+
mock_conf_lib.return_value.get_model.side_effect = [
980+
mock_model,
981+
mock_ir_model_data,
982+
]
983+
984+
# --- Act ---
985+
success, _, _, result_df = export_data(
986+
config="dummy.conf",
987+
model="res.partner",
988+
domain=[],
989+
header=header,
990+
output=None,
991+
)
992+
993+
# --- Assert ---
994+
assert success is True
995+
assert result_df is not None
996+
997+
# The '.id' column should be preserved, and 'id' should be the XML ID
998+
expected_df = pl.DataFrame(
999+
{
1000+
".id": [1, 2],
1001+
"id": ["base.partner_a_xmlid", None],
1002+
"name": ["Partner A", "Partner B"],
1003+
},
1004+
schema={".id": pl.Int64, "id": pl.String, "name": pl.String},
1005+
)
1006+
1007+
# Sort by name to ensure consistent order for comparison
1008+
assert_frame_equal(result_df.sort("name"), expected_df.sort("name"))

0 commit comments

Comments
 (0)