Skip to content

Commit b1d0cea

Browse files
authored
Fix --ignore-merge-errors (#250)
* pass kwargs, rework strict_merge * woops. * Docs.
1 parent ddb8659 commit b1d0cea

File tree

7 files changed

+27
-14
lines changed

7 files changed

+27
-14
lines changed

docs/source/version.7_0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ Breaking changes in ``yadg-next`` are:
2424
Bug fixes in ``yadg-next`` include:
2525

2626
- The parameter ``Set I/C`` in :mod:`yadg.extractors.eclab.mpr` files should be ``C / N`` when set to 1, not ``C``.
27+
- The command line argument ``--ignore-merge-errors`` was not being passed to the individual extractors when using the ``yadg extract`` syntax.
2728

src/yadg/core.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,6 @@ def process_schema(dataschema: DataSchema, strict_merge: bool = False) -> DataTr
2828
has to be identical. Defaults to ``False`` which means conflicts will be dropped.
2929
3030
"""
31-
if strict_merge:
32-
concatmode = "identical"
33-
else:
34-
concatmode = "drop_conflicts"
3531

3632
while hasattr(dataschema, "update"):
3733
dataschema = dataschema.update()
@@ -85,7 +81,7 @@ def process_schema(dataschema: DataSchema, strict_merge: bool = False) -> DataTr
8581
if k in fvals[name].attrs:
8682
del fvals[name].attrs[k]
8783

88-
vals = dgutils.merge_dicttrees(vals, fvals, concatmode)
84+
vals = dgutils.merge_dicttrees(vals, fvals, strict_merge)
8985

9086
stepdt = DataTree.from_dict({} if vals is None else vals)
9187
stepdt.name = step.tag

src/yadg/dgutils/dsutils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def dicts_to_dataset(
7474
return xr.Dataset(data_vars=darrs, coords=coords, attrs=attrs)
7575

7676

77-
def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
77+
def merge_dicttrees(vals: dict, fvals: dict, strict_merge: bool) -> dict:
7878
"""
7979
A helper function that merges two ``DataTree.to_dict()`` objects by concatenating
8080
the new values in ``fvals`` to the existing ones in ``vals``.
@@ -89,11 +89,11 @@ def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
8989
[vals[k], fvals[k]],
9090
dim="uts",
9191
data_vars="different",
92-
compat="identical",
92+
compat="identical" if strict_merge else "equals",
9393
join="outer",
94-
combine_attrs=mode,
94+
combine_attrs="identical" if strict_merge else "drop_conflicts",
9595
)
96-
except xr.MergeError:
96+
except (xr.MergeError, ValueError) as e:
9797
raise RuntimeError(
9898
"Merging metadata from multiple files has failed, as some of the "
9999
"values differ between files. This might be caused by trying to "
@@ -102,7 +102,7 @@ def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
102102
"yadg with the '--ignore-merge-errors' option."
103103
f"\n{vals[k].attrs=}"
104104
f"\n{fvals[k].attrs=}"
105-
)
105+
) from e
106106
return vals
107107

108108

src/yadg/extractors/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,14 @@ def extract(
6969
}
7070
).extractor
7171

72-
return extract_from_path(Path(path), extractor)
72+
return extract_from_path(Path(path), extractor, **kwargs)
7373

7474

7575
@deprecate_fn_path
7676
def extract_from_path(
7777
source: Path,
7878
extractor: FileType,
79+
**kwargs: dict,
7980
) -> DataTree:
8081
"""
8182
Extracts data and metadata from the provided path using the supplied extractor.
@@ -100,7 +101,7 @@ def extract_from_path(
100101
func = getattr(m, "extract")
101102

102103
# Func should always return a xarray.DataTree
103-
ret: DataTree = func(source=source, **vars(extractor))
104+
ret: DataTree = func(source=source, **vars(extractor), **kwargs)
104105
jsonize_orig_meta(ret)
105106

106107
ret.attrs.update(
@@ -119,6 +120,7 @@ def extract_from_path(
119120
def extract_from_bytes(
120121
source: bytes,
121122
extractor: FileType,
123+
**kwargs: dict,
122124
) -> DataTree:
123125
"""
124126
Extracts data and metadata from the provided raw bytes using the supplied extractor.

src/yadg/extractors/agilent/dx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,5 @@ def extract_from_path(
6868
for ffn in sorted(filenames):
6969
path = Path(tempdir) / ffn
7070
fdt = extract_ch(source=path, timezone=timezone, **kwargs)
71-
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), "identical")
71+
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), True)
7272
return DataTree.from_dict(dt)

src/yadg/extractors/fusion/zip.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@
5050
from yadg import dgutils
5151
from pathlib import Path
5252
from yadg.extractors import get_extract_dispatch
53+
import logging
5354

5455
extract = get_extract_dispatch()
56+
logger = logging.getLogger(__name__)
5557

5658

5759
@extract.register(Path)
@@ -63,14 +65,19 @@ def extract_from_path(
6365
**kwargs: dict,
6466
) -> DataTree:
6567
zf = zipfile.ZipFile(source)
68+
strict_merge = not kwargs.get("ignore_merge_errors", False)
69+
if strict_merge is False:
70+
logger.info("Will drop metadata conflicts in individual fusion-data files.")
71+
6672
with tempfile.TemporaryDirectory() as tempdir:
6773
zf.extractall(tempdir)
6874
dt = None
6975
filenames = [ffn for ffn in os.listdir(tempdir) if ffn.endswith("fusion-data")]
7076
for ffn in sorted(filenames):
77+
logger.debug("Processing filename '%s'", ffn)
7178
path = Path(tempdir) / ffn
7279
fdt = extract_json(
7380
source=path, timezone=timezone, encoding=encoding, **kwargs
7481
)
75-
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), "identical")
82+
dt = dgutils.merge_dicttrees(dt, fdt.to_dict(), strict_merge)
7683
return DataTree.from_dict(dt)

src/yadg/main.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,13 @@ def run_with_arguments():
177177
default=None,
178178
type=str,
179179
)
180+
extract.add_argument(
181+
"--ignore-merge-errors",
182+
dest="ignore_merge_errors",
183+
action="store_true",
184+
help="Ignore metadata merge errors while processing multiple files in a step.",
185+
default=False,
186+
)
180187
extract.set_defaults(func=subcommands.extract)
181188

182189
# parse subparser args

0 commit comments

Comments
 (0)