Skip to content

Commit 2ddd5d4

Browse files
yarikopticclaude
andcommitted
feat: add --max-per-group truncation to dandi validate
Limit how many results are shown per leaf group (or in the flat list when no grouping is used). Excess results are replaced by a TruncationNotice placeholder — a distinct dataclass (not a ValidationResult) so consumers can isinstance() check. - TruncationNotice dataclass + LeafItem/TruncatedResults type aliases - _truncate_leaves() walks the grouped tree, caps leaf lists - Human output: "... and N more issues" in cyan - Structured output: {"_truncated": true, "omitted_count": N} sentinel - Headers show original counts including omitted items - Works without grouping (flat list) and with multi-level grouping Co-Authored-By: Claude Code 2.1.63 / Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1100de5 commit 2ddd5d4

File tree

2 files changed

+615
-39
lines changed

2 files changed

+615
-39
lines changed

dandi/cli/cmd_validate.py

Lines changed: 217 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from __future__ import annotations
22

3+
from collections import OrderedDict
4+
import dataclasses
5+
import json as json_mod
36
import logging
47
import os
58
import re
69
import sys
7-
from typing import IO, cast
10+
from typing import IO, Union, cast
811
import warnings
912

1013
import click
@@ -18,6 +21,14 @@
1821

1922
lgr = logging.getLogger(__name__)
2023

24+
25+
@dataclasses.dataclass
26+
class TruncationNotice:
27+
"""Placeholder indicating omitted results in truncated output."""
28+
29+
omitted_count: int
30+
31+
2132
STRUCTURED_FORMATS = ("json", "json_pp", "json_lines", "yaml")
2233

2334
_EXT_TO_FORMAT = {
@@ -131,7 +142,9 @@ def validate_bids(
131142
"`dandi validate` instead. Proceeding to parse the call to `dandi validate` now.",
132143
DeprecationWarning,
133144
)
134-
ctx.invoke(validate, paths=paths, grouping=grouping)
145+
ctx.invoke(
146+
validate, paths=paths, grouping=(grouping,) if grouping != "none" else ()
147+
)
135148

136149

137150
@click.command()
@@ -145,12 +158,13 @@ def validate_bids(
145158
@click.option(
146159
"--grouping",
147160
"-g",
148-
help="How to group error/warning reporting.",
161+
help="How to group output. Repeat for hierarchical nesting, e.g. -g severity -g id.",
149162
type=click.Choice(
150163
["none", "path", "severity", "id", "validator", "standard", "dandiset"],
151164
case_sensitive=False,
152165
),
153-
default="none",
166+
multiple=True,
167+
default=(),
154168
)
155169
@click.option("--ignore", metavar="REGEX", help="Regex matching error IDs to ignore")
156170
@click.option(
@@ -181,6 +195,13 @@ def validate_bids(
181195
help="Show summary statistics.",
182196
default=False,
183197
)
198+
@click.option(
199+
"--max-per-group",
200+
type=int,
201+
default=None,
202+
help="Limit results per group (or total if ungrouped). "
203+
"Excess results are replaced by a count of omitted items.",
204+
)
184205
@click.option(
185206
"--load",
186207
help="Load validation results from JSONL file(s) instead of running validation.",
@@ -196,11 +217,12 @@ def validate(
196217
ctx: click.Context,
197218
paths: tuple[str, ...],
198219
ignore: str | None,
199-
grouping: str,
220+
grouping: tuple[str, ...],
200221
min_severity: str,
201222
output_format: str = "human",
202223
output_file: str | None = None,
203224
summary: bool = False,
225+
max_per_group: int | None = None,
204226
load: tuple[str, ...] = (),
205227
schema: str | None = None,
206228
devel_debug: bool = False,
@@ -210,6 +232,9 @@ def validate(
210232
211233
Exits with non-0 exit code if any file is not compliant.
212234
"""
235+
# Normalize grouping: strip "none" values
236+
grouping = tuple(g for g in grouping if g != "none")
237+
213238
# Auto-detect format from output file extension when --format not given
214239
if output_file is not None and output_format == "human":
215240
detected = _format_from_ext(output_file)
@@ -221,6 +246,13 @@ def validate(
221246
)
222247
output_format = detected
223248

249+
# JSONL is incompatible with grouping (flat format, no nesting)
250+
if grouping and output_format == "json_lines":
251+
raise click.UsageError(
252+
"--grouping is incompatible with json_lines format "
253+
"(JSONL is a flat format that cannot represent nested groups)."
254+
)
255+
224256
if load and paths:
225257
raise click.UsageError("--load and positional paths are mutually exclusive.")
226258

@@ -234,19 +266,31 @@ def validate(
234266
filtered = _filter_results(results, min_severity, ignore)
235267

236268
if output_format == "human":
237-
_render_human(filtered, grouping)
269+
_render_human(filtered, grouping, max_per_group=max_per_group)
238270
if summary:
239271
_print_summary(filtered, sys.stdout)
240272
_exit_if_errors(filtered)
241273
elif output_file is not None:
242274
with open(output_file, "w") as fh:
243-
_render_structured(filtered, output_format, fh)
275+
_render_structured(
276+
filtered,
277+
output_format,
278+
fh,
279+
grouping,
280+
max_per_group=max_per_group,
281+
)
244282
lgr.info("Validation output written to %s", output_file)
245283
if summary:
246284
_print_summary(filtered, sys.stderr)
247285
_exit_if_errors(filtered)
248286
else:
249-
_render_structured(filtered, output_format, sys.stdout)
287+
_render_structured(
288+
filtered,
289+
output_format,
290+
sys.stdout,
291+
grouping,
292+
max_per_group=max_per_group,
293+
)
250294
if summary:
251295
_print_summary(filtered, sys.stderr)
252296
# Auto-save sidecar next to logfile (skip when loading)
@@ -316,12 +360,39 @@ def _render_structured(
316360
results: list[ValidationResult],
317361
output_format: str,
318362
out: IO[str],
363+
grouping: tuple[str, ...] = (),
364+
max_per_group: int | None = None,
319365
) -> None:
320366
"""Render validation results in a structured format."""
321-
formatter = _get_formatter(output_format, out=out)
322-
with formatter:
323-
for r in results:
324-
formatter(r.model_dump(mode="json"))
367+
if grouping:
368+
# Grouped output: build nested dict, serialize directly
369+
grouped: GroupedResults | TruncatedResults = _group_results(results, grouping)
370+
if max_per_group is not None:
371+
grouped = _truncate_leaves(grouped, max_per_group)
372+
data = _serialize_grouped(grouped)
373+
if output_format in ("json", "json_pp"):
374+
indent = 2 if output_format == "json_pp" else None
375+
json_mod.dump(data, out, indent=indent, sort_keys=True, default=str)
376+
out.write("\n")
377+
elif output_format == "yaml":
378+
import ruamel.yaml
379+
380+
yaml = ruamel.yaml.YAML(typ="safe")
381+
yaml.default_flow_style = False
382+
yaml.dump(data, out)
383+
else:
384+
raise ValueError(f"Unsupported format for grouped output: {output_format}")
385+
else:
386+
items: list[dict] = [r.model_dump(mode="json") for r in results]
387+
if max_per_group is not None and len(items) > max_per_group:
388+
items = items[:max_per_group]
389+
items.append(
390+
{"_truncated": True, "omitted_count": len(results) - max_per_group}
391+
)
392+
formatter = _get_formatter(output_format, out=out)
393+
with formatter:
394+
for item in items:
395+
formatter(item)
325396

326397

327398
def _exit_if_errors(results: list[ValidationResult]) -> None:
@@ -348,20 +419,88 @@ def _group_key(issue: ValidationResult, grouping: str) -> str:
348419
raise NotImplementedError(f"Unsupported grouping: {grouping}")
349420

350421

422+
# Recursive grouped type: either a nested OrderedDict or leaf list
423+
GroupedResults = Union["OrderedDict[str, GroupedResults]", list[ValidationResult]]
424+
425+
# Leaf items after possible truncation
426+
LeafItem = Union[ValidationResult, TruncationNotice]
427+
TruncatedResults = Union["OrderedDict[str, TruncatedResults]", list[LeafItem]]
428+
429+
430+
def _group_results(
431+
results: list[ValidationResult],
432+
levels: tuple[str, ...],
433+
) -> GroupedResults:
434+
"""Group results recursively by the given hierarchy of grouping levels.
435+
436+
Returns a nested OrderedDict with leaf values as lists of ValidationResult.
437+
With zero levels, returns the flat list unchanged.
438+
"""
439+
if not levels:
440+
return results
441+
key_fn = levels[0]
442+
remaining = levels[1:]
443+
groups: OrderedDict[str, list[ValidationResult]] = OrderedDict()
444+
for r in results:
445+
k = _group_key(r, key_fn)
446+
groups.setdefault(k, []).append(r)
447+
if remaining:
448+
return OrderedDict((k, _group_results(v, remaining)) for k, v in groups.items())
449+
# mypy can't resolve the recursive type alias, but this is correct:
450+
# OrderedDict[str, list[VR]] is a valid GroupedResults
451+
return cast("GroupedResults", groups)
452+
453+
454+
def _truncate_leaves(
455+
grouped: GroupedResults | TruncatedResults, max_per_group: int
456+
) -> TruncatedResults:
457+
"""Truncate leaf lists to *max_per_group* items, appending a TruncationNotice."""
458+
if isinstance(grouped, list):
459+
if len(grouped) > max_per_group:
460+
kept: list[LeafItem] = list(grouped[:max_per_group])
461+
kept.append(TruncationNotice(len(grouped) - max_per_group))
462+
return kept
463+
return cast("TruncatedResults", grouped)
464+
return OrderedDict(
465+
(k, _truncate_leaves(v, max_per_group)) for k, v in grouped.items()
466+
)
467+
468+
469+
def _serialize_grouped(grouped: GroupedResults | TruncatedResults) -> dict | list:
470+
"""Convert grouped results to a JSON-serializable nested dict/list."""
471+
if isinstance(grouped, list):
472+
result: list[dict] = []
473+
for item in grouped:
474+
if isinstance(item, TruncationNotice):
475+
result.append({"_truncated": True, "omitted_count": item.omitted_count})
476+
else:
477+
result.append(item.model_dump(mode="json"))
478+
return result
479+
return {k: _serialize_grouped(v) for k, v in grouped.items()}
480+
481+
351482
def _render_human(
352483
issues: list[ValidationResult],
353-
grouping: str,
484+
grouping: tuple[str, ...],
485+
max_per_group: int | None = None,
354486
) -> None:
355487
"""Render validation results in human-readable colored format."""
356-
if grouping == "none":
357-
purviews = [i.purview for i in issues]
488+
if not grouping:
489+
shown = issues
490+
omitted = 0
491+
if max_per_group is not None and len(issues) > max_per_group:
492+
shown = issues[:max_per_group]
493+
omitted = len(issues) - max_per_group
494+
purviews = [i.purview for i in shown]
358495
display_errors(
359496
purviews,
360-
[i.id for i in issues],
361-
cast("list[Severity]", [i.severity for i in issues]),
362-
[i.message for i in issues],
497+
[i.id for i in shown],
498+
cast("list[Severity]", [i.severity for i in shown]),
499+
[i.message for i in shown],
363500
)
364-
elif grouping == "path":
501+
if omitted:
502+
click.secho(f"... and {pluralize(omitted, 'more issue')}", fg="cyan")
503+
elif grouping == ("path",):
365504
# Legacy path grouping: de-duplicate purviews, show per-path
366505
purviews = list(set(i.purview for i in issues))
367506
for purview in purviews:
@@ -373,39 +512,80 @@ def _render_human(
373512
[i.message for i in applies_to],
374513
)
375514
else:
376-
# Generic grouped rendering with section headers
377-
from collections import OrderedDict
515+
grouped: GroupedResults | TruncatedResults = _group_results(issues, grouping)
516+
if max_per_group is not None:
517+
grouped = _truncate_leaves(grouped, max_per_group)
518+
_render_human_grouped(grouped, depth=0)
519+
520+
if not any(r.severity is not None and r.severity >= Severity.ERROR for r in issues):
521+
click.secho("No errors found.", fg="green")
522+
378523

379-
groups: OrderedDict[str, list[ValidationResult]] = OrderedDict()
380-
for issue in issues:
381-
key = _group_key(issue, grouping)
382-
groups.setdefault(key, []).append(issue)
524+
def _count_leaves(grouped: GroupedResults | TruncatedResults) -> int:
525+
"""Count total items in a grouped structure (including omitted counts)."""
526+
if isinstance(grouped, list):
527+
return sum(
528+
item.omitted_count if isinstance(item, TruncationNotice) else 1
529+
for item in grouped
530+
)
531+
return sum(_count_leaves(v) for v in grouped.values())
383532

384-
for key, group_issues in groups.items():
385-
header = f"=== {key} ({pluralize(len(group_issues), 'issue')}) ==="
533+
534+
def _render_human_grouped(
535+
grouped: GroupedResults | TruncatedResults,
536+
depth: int,
537+
) -> None:
538+
"""Recursively render grouped results with nested indented section headers."""
539+
indent = " " * depth
540+
if isinstance(grouped, list):
541+
# Leaf level: render individual issues
542+
for issue in grouped:
543+
if isinstance(issue, TruncationNotice):
544+
click.secho(
545+
f"{indent}... and {pluralize(issue.omitted_count, 'more issue')}",
546+
fg="cyan",
547+
)
548+
continue
549+
msg = f"{indent}[{issue.id}] {issue.purview}{issue.message}"
550+
fg = _get_severity_color(
551+
[issue.severity] if issue.severity is not None else []
552+
)
553+
click.secho(msg, fg=fg)
554+
else:
555+
for key, value in grouped.items():
556+
count = _count_leaves(value)
557+
header = f"{indent}=== {key} ({pluralize(count, 'issue')}) ==="
558+
# Determine color from all issues in this group
559+
all_issues = _collect_all_issues(value)
386560
fg = _get_severity_color(
387561
cast(
388562
"list[Severity]",
389-
[i.severity for i in group_issues if i.severity is not None],
563+
[i.severity for i in all_issues if i.severity is not None],
390564
)
391565
)
392566
click.secho(header, fg=fg, bold=True)
393-
for issue in group_issues:
394-
msg = f" [{issue.id}] {issue.purview}{issue.message}"
395-
ifg = _get_severity_color(
396-
[issue.severity] if issue.severity is not None else []
397-
)
398-
click.secho(msg, fg=ifg)
567+
_render_human_grouped(value, depth + 1)
399568

400-
if not any(r.severity is not None and r.severity >= Severity.ERROR for r in issues):
401-
click.secho("No errors found.", fg="green")
569+
570+
def _collect_all_issues(
571+
grouped: GroupedResults | TruncatedResults,
572+
) -> list[ValidationResult]:
573+
"""Flatten a grouped structure into a list of all ValidationResults."""
574+
if isinstance(grouped, list):
575+
return [item for item in grouped if isinstance(item, ValidationResult)]
576+
result: list[ValidationResult] = []
577+
for v in grouped.values():
578+
result.extend(_collect_all_issues(v))
579+
return result
402580

403581

404582
def _process_issues(
405583
issues: list[ValidationResult],
406-
grouping: str,
584+
grouping: str | tuple[str, ...],
407585
) -> None:
408586
"""Legacy wrapper: render human output and exit if errors."""
587+
if isinstance(grouping, str):
588+
grouping = (grouping,) if grouping != "none" else ()
409589
_render_human(issues, grouping)
410590
_exit_if_errors(issues)
411591

0 commit comments

Comments
 (0)