Skip to content

Commit 3b77659

Browse files
bosdbosd
authored andcommitted
Fix: Handle CSV separator correctly in sort_for_self_referencing function
- Added separator parameter to sort_for_self_referencing function to properly handle semicolon-separated CSV files - Fixed temp file writing to preserve original file separator - Improved error handling to distinguish between fatal file errors and schema validation issues - Schema validation errors no longer abort imports unnecessarily - Updated tests to reflect new parameter requirements - All 370 tests continue to pass This resolves the false error where malformed CSV detection was incorrectly aborting valid imports.
1 parent 49af013 commit 3b77659

File tree

5 files changed

+40
-11
lines changed

5 files changed

+40
-11
lines changed

src/odoo_data_flow/importer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ def run_import( # noqa: C901
189189
id_column=import_plan["id_column"],
190190
parent_column=import_plan["parent_column"],
191191
encoding=encoding,
192+
separator=separator,
192193
)
193194
if isinstance(sorted_temp_file, str):
194195
file_to_process = sorted_temp_file

src/odoo_data_flow/lib/preflight.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,10 @@ def self_referencing_check(
113113
# We assume 'id' and 'parent_id' as conventional names.
114114
# This could be made configurable later if needed.
115115
result = sort.sort_for_self_referencing(
116-
filename, id_column="id", parent_column="parent_id"
116+
filename,
117+
id_column="id",
118+
parent_column="parent_id",
119+
separator=kwargs.get("separator", ";"),
117120
)
118121
if result is False:
119122
# This means there was an error in sort_for_self_referencing

src/odoo_data_flow/lib/sort.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,16 @@
55

66
import polars as pl
77

8+
from ..logging_config import log
89
from .internal.ui import _show_error_panel
910

1011

1112
def sort_for_self_referencing(
12-
file_path: str, id_column: str, parent_column: str, encoding: str = "utf-8"
13+
file_path: str,
14+
id_column: str,
15+
parent_column: str,
16+
encoding: str = "utf-8",
17+
separator: str = ",",
1318
) -> Optional[Union[str, bool]]:
1419
"""Sorts a CSV file for self-referencing hierarchies.
1520
@@ -28,19 +33,39 @@ def sort_for_self_referencing(
2833
id_column (str): The name of the unique identifier column.
2934
parent_column (str): The name of the column containing the parent reference.
3035
encoding (str): The encoding of the CSV file.
36+
separator (str): The field separator used in the CSV file.
3137
3238
Returns:
3339
Optional[Union[str, bool]]: The path to the temporary sorted CSV file if sorting
3440
was performed, None if no sorting is needed or possible, or False if
3541
there was an error reading the file.
3642
"""
3743
try:
38-
df = pl.read_csv(file_path, encoding=encoding)
39-
except (FileNotFoundError, pl.exceptions.PolarsError) as e:
44+
# For the sort function, we only care about being able to read the file
45+
# well enough to detect self-referencing hierarchies. We don't need
46+
# to parse all columns perfectly, so we use a very tolerant approach.
47+
df = pl.read_csv(
48+
file_path,
49+
separator=separator,
50+
encoding=encoding,
51+
truncate_ragged_lines=True,
52+
infer_schema_length=0, # Don't infer schema, treat everything as string
53+
)
54+
except FileNotFoundError as e:
55+
_show_error_panel(
56+
"File Read Error", f"Could not read the file {file_path}: {e}"
57+
)
58+
return False # Return False to indicate an error occurred
59+
except pl.exceptions.NoDataError as e:
4060
_show_error_panel(
4161
"File Read Error", f"Could not read the file {file_path}: {e}"
4262
)
4363
return False # Return False to indicate an error occurred
64+
except Exception as e:
65+
# For other errors (like schema validation), we don't want to abort the import
66+
# These should be handled by the field validation preflight check
67+
log.warning(f"Could not fully parse file {file_path} for sorting: {e}")
68+
return None # Return None to indicate no sorting needed/possible
4469

4570
if id_column not in df.columns or parent_column not in df.columns:
4671
return None
@@ -58,9 +83,9 @@ def sort_for_self_referencing(
5883
pl.col(parent_column).is_null(), parent_column, descending=[True, False]
5984
)
6085

61-
# Write to a temporary file
86+
# Write to a temporary file with the same separator as the original
6287
temp_file = tempfile.NamedTemporaryFile(
6388
mode="w+", delete=False, suffix=".csv", newline=""
6489
)
65-
sorted_df.write_csv(temp_file.name)
90+
sorted_df.write_csv(temp_file.name, separator=separator)
6691
return temp_file.name

tests/test_preflight.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def test_check_plans_strategy_when_hierarchy_detected(
6363
assert import_plan["id_column"] == "id"
6464
assert import_plan["parent_column"] == "parent_id"
6565
mock_sort.assert_called_once_with(
66-
"file.csv", id_column="id", parent_column="parent_id"
66+
"file.csv", id_column="id", parent_column="parent_id", separator=";"
6767
)
6868

6969
@patch("odoo_data_flow.lib.preflight.sort.sort_for_self_referencing")

tests/test_sort.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def non_hierarchical_csv(tmp_path: Path) -> str:
3939
def test_sorts_correctly_when_self_referencing(hierarchical_csv: str) -> None:
4040
"""Verify that a self-referencing CSV is sorted correctly."""
4141
sorted_file = sort_for_self_referencing(
42-
hierarchical_csv, id_column="id", parent_column="parent_id"
42+
hierarchical_csv, id_column="id", parent_column="parent_id", separator=","
4343
)
4444
assert sorted_file is not None
4545
# Make sure it's not False (error case)
@@ -60,7 +60,7 @@ def test_sorts_correctly_when_self_referencing(hierarchical_csv: str) -> None:
6060
def test_returns_none_when_not_self_referencing(non_hierarchical_csv: str) -> None:
6161
"""Verify that None is returned if the hierarchy is not self-referencing."""
6262
sorted_file = sort_for_self_referencing(
63-
non_hierarchical_csv, id_column="id", parent_column="category_id"
63+
non_hierarchical_csv, id_column="id", parent_column="category_id", separator=","
6464
)
6565
assert sorted_file is None
6666

@@ -74,7 +74,7 @@ def test_returns_none_if_columns_missing() -> None:
7474

7575
assert (
7676
sort_for_self_referencing(
77-
str(file_path), id_column="id", parent_column="parent_id"
77+
str(file_path), id_column="id", parent_column="parent_id", separator=","
7878
)
7979
is None
8080
)
@@ -84,6 +84,6 @@ def test_returns_none_if_columns_missing() -> None:
8484
def test_returns_false_for_non_existent_file() -> None:
8585
"""Verify that False is returned if the input file does not exist."""
8686
result = sort_for_self_referencing(
87-
"non_existent.csv", id_column="id", parent_column="parent_id"
87+
"non_existent.csv", id_column="id", parent_column="parent_id", separator=","
8888
)
8989
assert result is False

0 commit comments

Comments
 (0)