Skip to content

Commit 88c9494

Browse files
Feat: Add CLI
1 parent 0a2bf7f commit 88c9494

File tree

9 files changed

+451
-29
lines changed

9 files changed

+451
-29
lines changed

dir_content_diff/__init__.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import re
1818
from pathlib import Path
1919

20+
from dir_content_diff.base_comparators import BaseComparator
2021
from dir_content_diff.base_comparators import DefaultComparator
2122
from dir_content_diff.base_comparators import IniComparator
2223
from dir_content_diff.base_comparators import JsonComparator
@@ -29,7 +30,6 @@
2930

3031
__version__ = importlib.metadata.version("dir-content-diff")
3132

32-
3333
_DEFAULT_COMPARATORS = {
3434
None: DefaultComparator(),
3535
".cfg": IniComparator(), # luigi config files
@@ -230,6 +230,28 @@ def export_formatted_file(file, formatted_file, comparator, **kwargs):
230230
)
231231

232232

233+
def pick_comparator(comparator=None, suffix=None, comparators=None):
234+
"""Pick a comparator based on its name or a file suffix."""
235+
if isinstance(comparator, BaseComparator):
236+
return comparator
237+
if comparators is None:
238+
comparators = get_comparators()
239+
if comparator is not None:
240+
for i in comparators.values(): # pragma: no branch
241+
if i.__class__.__name__ == comparator:
242+
return i
243+
LOGGER.debug(
244+
"Could not find the comparator named '%s' in the given comparators",
245+
comparator,
246+
)
247+
if suffix is not None:
248+
if suffix in comparators:
249+
return comparators.get(suffix)
250+
LOGGER.debug("Could not find the comparator for the '%s' suffix", suffix)
251+
LOGGER.debug("Returning the default comparator")
252+
return _COMPARATORS.get(None)
253+
254+
233255
def compare_trees(
234256
ref_path,
235257
comp_path,
@@ -290,9 +312,6 @@ def compare_trees(
290312
difference messages. If the directories are considered as equal, an empty ``dict`` is
291313
returned.
292314
"""
293-
if comparators is None:
294-
comparators = _COMPARATORS
295-
296315
ref_path = Path(ref_path)
297316
comp_path = Path(comp_path)
298317
formatted_data_path = comp_path.with_name(
@@ -332,12 +351,10 @@ def compare_trees(
332351
break
333352
if specific_file_args is None:
334353
specific_file_args = {}
335-
comparator = specific_file_args.pop(
336-
"comparator",
337-
comparators.get(
338-
ref_file.suffix,
339-
_COMPARATORS.get(None),
340-
),
354+
comparator = pick_comparator(
355+
comparator=specific_file_args.pop("comparator", None),
356+
suffix=ref_file.suffix,
357+
comparators=comparators,
341358
)
342359
comparator_args = specific_file_args.pop("args", [])
343360
res = compare_files(

dir_content_diff/base_comparators.py

Lines changed: 90 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@
1919
from xml.etree import ElementTree
2020

2121
import dictdiffer
22+
import diff_pdf_visually
2223
import jsonpath_ng
2324
import yaml
2425
from dicttoxml import dicttoxml
25-
from diff_pdf_visually import pdf_similar
26+
from diff_pdf_visually import pdfdiff_pages
2627

2728
from dir_content_diff.util import diff_msg_formatter
2829

@@ -618,35 +619,114 @@ def diff(self, ref, comp, *args, **kwargs):
618619
619620
Keyword Args:
620621
threshold (int): The threshold used to compare the images.
621-
tempdir (pathlib.Path): Empty directory where the temporary images will be exported.
622+
tempdir (pathlib.Path): Directory in which a new ``dir-diff` directory will be created
623+
to export the debug images.
622624
dpi (int): The resolution used to convert the PDF files into images.
623625
verbosity (int): The log verbosity.
624626
max_report_pagenos (int): Only this number of the different pages will be logged (only
625627
used if the verbosity is greater than 1).
626628
num_threads (int): If set to 2 (the default), the image conversion are processed in
627629
parallel. If set to 1 it is processed sequentially.
628630
"""
631+
res = pdfdiff_pages(ref, comp, *args, **kwargs)
632+
if not res:
633+
return False
634+
else:
635+
return res
636+
637+
def __call__(self, ref_file, comp_file, *args, **kwargs):
638+
"""Process arguments before calling the diff method."""
629639
tempdir = kwargs.pop("tempdir", None)
630640
if tempdir is not None:
631641
relative_parts = []
632-
for i, j in zip(ref.parts[::-1], comp.parts[::-1]): # pragma: no branch
642+
for i, j in zip(
643+
ref_file.parts[::-1], comp_file.parts[::-1]
644+
): # pragma: no branch
633645
if i != j:
634646
break
635647
relative_parts.append(i)
648+
if relative_parts and relative_parts[-1] == Path(tempdir).root:
649+
relative_parts.pop()
636650
if not relative_parts:
637-
relative_parts.append(comp.name)
638-
relative_parts[-1] = "diff-pdf-" + relative_parts[-1]
651+
relative_parts.append(comp_file.name)
652+
relative_parts.append("diff-pdf")
639653
new_tempdir = Path(tempdir) / Path(*relative_parts[::-1])
640654

641655
# Deduplicate name if needed
656+
last_part = str(relative_parts[-1])
642657
num = 1
643658
while True:
644-
try:
659+
root = Path(tempdir) / relative_parts[-1]
660+
if not root.exists():
645661
new_tempdir.mkdir(parents=True, exist_ok=False)
646662
break
647-
except FileExistsError:
648-
new_tempdir = new_tempdir.with_name(new_tempdir.name + f"_{num}")
649-
num += 1
663+
relative_parts[-1] = last_part + f"_{num}"
664+
new_tempdir = Path(tempdir) / Path(*relative_parts[::-1])
665+
num += 1
650666

651667
kwargs["tempdir"] = new_tempdir
652-
return not pdf_similar(ref, comp, *args, **kwargs)
668+
669+
try:
670+
# Update default verbosity
671+
if "verbosity" not in kwargs:
672+
current_default_verbosity = int(
673+
diff_pdf_visually.constants.DEFAULT_VERBOSITY
674+
)
675+
try:
676+
if diff_pdf_visually.diff.pdfdiff_pages.__defaults__[1] is None:
677+
diff_pdf_visually.constants.DEFAULT_VERBOSITY = 0
678+
else:
679+
kwargs["verbosity"] = 0
680+
finally:
681+
diff_pdf_visually.constants.DEFAULT_VERBOSITY = (
682+
current_default_verbosity
683+
)
684+
return super().__call__(ref_file, comp_file, *args, **kwargs)
685+
finally:
686+
diff_pdf_visually.constants.DEFAULT_VERBOSITY = current_default_verbosity
687+
688+
def report(
689+
self,
690+
ref_file,
691+
comp_file,
692+
formatted_differences,
693+
diff_args,
694+
diff_kwargs,
695+
load_kwargs=None,
696+
format_data_kwargs=None,
697+
filter_kwargs=None,
698+
format_diff_kwargs=None,
699+
sort_kwargs=None,
700+
concat_kwargs=None,
701+
**kwargs,
702+
): # pylint: disable=too-many-arguments
703+
"""Add specific information before calling the default method."""
704+
if formatted_differences:
705+
if isinstance(formatted_differences, str):
706+
formatted_differences = (
707+
"The following pages are the most different: "
708+
+ formatted_differences.replace("\n", ", ")
709+
)
710+
if "tempdir" in diff_kwargs:
711+
formatted_differences += (
712+
"\nThe visual differences can be found here: "
713+
+ str(diff_kwargs["tempdir"])
714+
)
715+
return super().report(
716+
ref_file,
717+
comp_file,
718+
formatted_differences,
719+
diff_args,
720+
diff_kwargs,
721+
load_kwargs=load_kwargs,
722+
format_data_kwargs=format_data_kwargs,
723+
filter_kwargs=filter_kwargs,
724+
format_diff_kwargs=format_diff_kwargs,
725+
sort_kwargs=sort_kwargs,
726+
concat_kwargs=concat_kwargs,
727+
**kwargs,
728+
)
729+
730+
def format_diff(self, difference, **kwargs):
731+
"""Format one element difference."""
732+
return str(difference)

dir_content_diff/cli/__init__.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
"""Main entry point of the Command Line Interface for the dir-content-diff package."""
2+
3+
# LICENSE HEADER MANAGED BY add-license-header
4+
# Copyright (c) 2023-2024 Blue Brain Project, EPFL.
5+
#
6+
# This file is part of dir-content-diff.
7+
# See https://github.com/BlueBrain/dir-content-diff for further info.
8+
#
9+
# SPDX-License-Identifier: Apache-2.0
10+
# LICENSE HEADER MANAGED BY add-license-header
11+
12+
import json
13+
import logging
14+
import sys
15+
from pathlib import Path
16+
17+
import click
18+
from yaml import safe_load
19+
20+
from dir_content_diff import compare_files
21+
from dir_content_diff import compare_trees
22+
from dir_content_diff import export_formatted_file
23+
from dir_content_diff import pick_comparator
24+
from dir_content_diff.util import LOGGER
25+
26+
27+
def setup_logger(level: str = "info"):
28+
"""Setup application logger."""
29+
level = level.lower()
30+
levels = {
31+
"debug": logging.DEBUG,
32+
"info": logging.INFO,
33+
"warning": logging.WARNING,
34+
"error": logging.ERROR,
35+
"critical": logging.CRITICAL,
36+
}
37+
logging.basicConfig(
38+
format="%(levelname)s - %(message)s",
39+
# format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
40+
# datefmt="%Y-%m-%dT%H:%M:%S",
41+
level=levels[level],
42+
)
43+
44+
45+
def load_config(ctx, param, value):
46+
"""Load configuration from the given path."""
47+
ctx.config = {}
48+
if value is not None:
49+
try:
50+
ctx.config = json.loads(value)
51+
except Exception as json_exc:
52+
try:
53+
path = Path(value)
54+
if not path.exists():
55+
msg = f"The file '{path}' does not exist."
56+
raise FileNotFoundError(msg)
57+
with path.open() as f:
58+
ctx.config = safe_load(f.read())
59+
except Exception as path_exc:
60+
raise path_exc from json_exc
61+
62+
63+
@click.command(
64+
short_help="Compare the two given inputs",
65+
epilog=(
66+
"Note: When comparing directories, only the files located in the REFERENCE_INPUT will be "
67+
"considered in the COMPARED_INPUT."
68+
),
69+
)
70+
@click.argument("reference_input", type=click.Path(dir_okay=True, exists=True))
71+
@click.argument("compared_input", type=click.Path(dir_okay=True, exists=True))
72+
@click.option(
73+
"-c",
74+
"--config",
75+
callback=load_config,
76+
is_eager=True,
77+
expose_value=False,
78+
show_default=True,
79+
help="Read option defaults from the given JSON string or the specified YAML file.",
80+
)
81+
@click.option(
82+
"--log-level",
83+
type=click.Choice(["debug", "info", "warning", "error", "critical"]),
84+
default="info",
85+
help="The logger level.",
86+
)
87+
@click.option(
88+
"-f/-nf",
89+
"--export-formatted-files/--no-export-formatted-files",
90+
default=False,
91+
help="Export the files after they were formatted by the comparators.",
92+
)
93+
@click.option(
94+
"-s/-ns",
95+
"--sort-diffs/--no-sort-diffs",
96+
default=False,
97+
help="Sort the differences by file name.",
98+
)
99+
@click.version_option()
100+
@click.pass_context
101+
def main(ctx, *args, **kwargs):
102+
"""A command line tool for directory or file comparison.
103+
104+
REFERENCE_INPUT is the file or directory considered as the reference for comparison.
105+
106+
COMPARED_INPUT is the file or directory considered as the compared input.
107+
"""
108+
log_level = kwargs.pop("log_level", "info")
109+
110+
setup_logger(log_level)
111+
112+
LOGGER.debug("Running the following command: %s", " ".join(sys.argv))
113+
LOGGER.debug("Running from the following folder: %s", Path.cwd())
114+
115+
ref = Path(kwargs.pop("reference_input"))
116+
comp = Path(kwargs.pop("compared_input"))
117+
input_diff(
118+
ref,
119+
comp,
120+
ctx.config,
121+
kwargs.pop("export_formatted_files", False),
122+
kwargs.pop("sort_diffs", False),
123+
)
124+
125+
126+
def input_diff(ref, comp, config, export_formatted_files=False, sort_diffs=False):
127+
"""Compute and display differences from given inputs."""
128+
ref = Path(ref)
129+
comp = Path(comp)
130+
ref_is_dir = ref.is_dir()
131+
comp_is_dir = comp.is_dir()
132+
if ref_is_dir != comp_is_dir:
133+
msg = "The reference and compared inputs must both be either two directories or two files."
134+
raise ValueError(msg)
135+
136+
if ref_is_dir:
137+
res = compare_trees(
138+
ref,
139+
comp,
140+
specific_args=config,
141+
export_formatted_files=export_formatted_files,
142+
)
143+
if sort_diffs:
144+
res = sorted(res.items(), key=lambda x: x[0])
145+
else:
146+
comparator_name = config.pop("comparator", None)
147+
comparator = pick_comparator(
148+
comparator=comparator_name,
149+
suffix=ref.suffix,
150+
)
151+
res = {str(ref): compare_files(ref, comp, comparator, **config)}
152+
if export_formatted_files:
153+
export_formatted_file(ref, **config)
154+
export_formatted_file(comp, **config)
155+
156+
if res:
157+
LOGGER.info(
158+
"Differences found between '%s' and '%s':\n\n\n%s",
159+
ref,
160+
comp,
161+
("\n\n\n".join([i[1] for i in res.items()])),
162+
)
163+
else:
164+
LOGGER.info("No difference found between '%s' and '%s'", ref, comp)

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ classifiers = [
2121
]
2222
dynamic = ["version", "optional-dependencies"]
2323
dependencies = [
24+
"click>=8",
2425
"dictdiffer>=0.8",
2526
"dicttoxml>=1.7.12",
2627
"diff_pdf_visually>=1.7",
@@ -34,6 +35,9 @@ Homepage = "https://dir-content-diff.readthedocs.io"
3435
Repository = "https://github.com/BlueBrain/dir-content-diff"
3536
Tracker = "https://github.com/BlueBrain/dir-content-diff/issues"
3637

38+
[project.scripts]
39+
dir-content-diff = "dir_content_diff.cli:main"
40+
3741
[project.entry-points.pytest11]
3842
dir-content-diff = "dir_content_diff.pytest_plugin"
3943

0 commit comments

Comments
 (0)