Skip to content

Commit c630f60

Browse files
Feat: Add CLI (#70)
1 parent 0a2bf7f commit c630f60

File tree

9 files changed

+650
-30
lines changed

9 files changed

+650
-30
lines changed

dir_content_diff/__init__.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import re
1818
from pathlib import Path
1919

20+
from dir_content_diff.base_comparators import BaseComparator
2021
from dir_content_diff.base_comparators import DefaultComparator
2122
from dir_content_diff.base_comparators import IniComparator
2223
from dir_content_diff.base_comparators import JsonComparator
@@ -29,7 +30,6 @@
2930

3031
__version__ = importlib.metadata.version("dir-content-diff")
3132

32-
3333
_DEFAULT_COMPARATORS = {
3434
None: DefaultComparator(),
3535
".cfg": IniComparator(), # luigi config files
@@ -224,12 +224,34 @@ def export_formatted_file(file, formatted_file, comparator, **kwargs):
224224
),
225225
)
226226
else:
227-
LOGGER.info(
227+
LOGGER.debug(
228228
"Skip formatting for '%s' because the comparator has no saving capability.",
229229
file,
230230
)
231231

232232

233+
def pick_comparator(comparator=None, suffix=None, comparators=None):
234+
"""Pick a comparator based on its name or a file suffix."""
235+
if isinstance(comparator, BaseComparator):
236+
return comparator
237+
if comparators is None:
238+
comparators = get_comparators()
239+
if comparator is not None:
240+
for i in comparators.values(): # pragma: no branch
241+
if i.__class__.__name__ == comparator:
242+
return i
243+
LOGGER.debug(
244+
"Could not find the comparator named '%s' in the given comparators",
245+
comparator,
246+
)
247+
if suffix is not None:
248+
if suffix in comparators:
249+
return comparators.get(suffix)
250+
LOGGER.debug("Could not find the comparator for the '%s' suffix", suffix)
251+
LOGGER.debug("Returning the default comparator")
252+
return _COMPARATORS.get(None)
253+
254+
233255
def compare_trees(
234256
ref_path,
235257
comp_path,
@@ -290,9 +312,6 @@ def compare_trees(
290312
difference messages. If the directories are considered as equal, an empty ``dict`` is
291313
returned.
292314
"""
293-
if comparators is None:
294-
comparators = _COMPARATORS
295-
296315
ref_path = Path(ref_path)
297316
comp_path = Path(comp_path)
298317
formatted_data_path = comp_path.with_name(
@@ -332,12 +351,10 @@ def compare_trees(
332351
break
333352
if specific_file_args is None:
334353
specific_file_args = {}
335-
comparator = specific_file_args.pop(
336-
"comparator",
337-
comparators.get(
338-
ref_file.suffix,
339-
_COMPARATORS.get(None),
340-
),
354+
comparator = pick_comparator(
355+
comparator=specific_file_args.pop("comparator", None),
356+
suffix=ref_file.suffix,
357+
comparators=comparators,
341358
)
342359
comparator_args = specific_file_args.pop("args", [])
343360
res = compare_files(

dir_content_diff/base_comparators.py

Lines changed: 90 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@
1919
from xml.etree import ElementTree
2020

2121
import dictdiffer
22+
import diff_pdf_visually
2223
import jsonpath_ng
2324
import yaml
2425
from dicttoxml import dicttoxml
25-
from diff_pdf_visually import pdf_similar
26+
from diff_pdf_visually import pdfdiff_pages
2627

2728
from dir_content_diff.util import diff_msg_formatter
2829

@@ -618,35 +619,114 @@ def diff(self, ref, comp, *args, **kwargs):
618619
619620
Keyword Args:
620621
threshold (int): The threshold used to compare the images.
621-
tempdir (pathlib.Path): Empty directory where the temporary images will be exported.
622+
tempdir (pathlib.Path): Directory in which a new ``dir-diff`` directory will be created
623+
to export the debug images.
622624
dpi (int): The resolution used to convert the PDF files into images.
623625
verbosity (int): The log verbosity.
624626
max_report_pagenos (int): Only this number of the different pages will be logged (only
625627
used if the verbosity is greater than 1).
626628
num_threads (int): If set to 2 (the default), the image conversion are processed in
627629
parallel. If set to 1 it is processed sequentially.
628630
"""
631+
res = pdfdiff_pages(ref, comp, *args, **kwargs)
632+
if not res:
633+
return False
634+
return res
635+
636+
def __call__(self, ref_file, comp_file, *args, **kwargs):
637+
"""Process arguments before calling the diff method."""
629638
tempdir = kwargs.pop("tempdir", None)
630639
if tempdir is not None:
631640
relative_parts = []
632-
for i, j in zip(ref.parts[::-1], comp.parts[::-1]): # pragma: no branch
641+
for i, j in zip(
642+
ref_file.parts[::-1], comp_file.parts[::-1]
643+
): # pragma: no branch
633644
if i != j:
634645
break
635646
relative_parts.append(i)
647+
if relative_parts and relative_parts[-1] == Path(tempdir).root:
648+
relative_parts.pop()
636649
if not relative_parts:
637-
relative_parts.append(comp.name)
638-
relative_parts[-1] = "diff-pdf-" + relative_parts[-1]
650+
relative_parts.append(comp_file.name)
651+
relative_parts.append("diff-pdf")
639652
new_tempdir = Path(tempdir) / Path(*relative_parts[::-1])
640653

641654
# Deduplicate name if needed
655+
last_part = str(relative_parts[-1])
642656
num = 1
643657
while True:
644-
try:
658+
root = Path(tempdir) / relative_parts[-1]
659+
if not root.exists():
645660
new_tempdir.mkdir(parents=True, exist_ok=False)
646661
break
647-
except FileExistsError:
648-
new_tempdir = new_tempdir.with_name(new_tempdir.name + f"_{num}")
649-
num += 1
662+
relative_parts[-1] = last_part + f"_{num}"
663+
new_tempdir = Path(tempdir) / Path(*relative_parts[::-1])
664+
num += 1
650665

651666
kwargs["tempdir"] = new_tempdir
652-
return not pdf_similar(ref, comp, *args, **kwargs)
667+
668+
try:
669+
# Update default verbosity
670+
if "verbosity" not in kwargs: # pragma: no branch
671+
current_default_verbosity = int(
672+
diff_pdf_visually.constants.DEFAULT_VERBOSITY
673+
)
674+
try:
675+
if (
676+
diff_pdf_visually.diff.pdfdiff_pages.__defaults__[1] is None
677+
): # pragma: no cover
678+
diff_pdf_visually.constants.DEFAULT_VERBOSITY = 0
679+
else:
680+
kwargs["verbosity"] = 0
681+
finally:
682+
diff_pdf_visually.constants.DEFAULT_VERBOSITY = (
683+
current_default_verbosity
684+
)
685+
return super().__call__(ref_file, comp_file, *args, **kwargs)
686+
finally:
687+
diff_pdf_visually.constants.DEFAULT_VERBOSITY = current_default_verbosity
688+
689+
def report(
690+
self,
691+
ref_file,
692+
comp_file,
693+
formatted_differences,
694+
diff_args,
695+
diff_kwargs,
696+
load_kwargs=None,
697+
format_data_kwargs=None,
698+
filter_kwargs=None,
699+
format_diff_kwargs=None,
700+
sort_kwargs=None,
701+
concat_kwargs=None,
702+
**kwargs,
703+
): # pylint: disable=too-many-arguments
704+
"""Add specific information before calling the default method."""
705+
if formatted_differences and isinstance(formatted_differences, str):
706+
formatted_differences = (
707+
"The following pages are the most different: "
708+
+ formatted_differences.replace("\n", ", ")
709+
)
710+
if "tempdir" in diff_kwargs:
711+
formatted_differences += (
712+
"\nThe visual differences can be found here: "
713+
+ str(diff_kwargs["tempdir"])
714+
)
715+
return super().report(
716+
ref_file,
717+
comp_file,
718+
formatted_differences,
719+
diff_args,
720+
diff_kwargs,
721+
load_kwargs=load_kwargs,
722+
format_data_kwargs=format_data_kwargs,
723+
filter_kwargs=filter_kwargs,
724+
format_diff_kwargs=format_diff_kwargs,
725+
sort_kwargs=sort_kwargs,
726+
concat_kwargs=concat_kwargs,
727+
**kwargs,
728+
)
729+
730+
def format_diff(self, difference, **kwargs):
731+
"""Format one element difference."""
732+
return str(difference)

0 commit comments

Comments
 (0)