11from __future__ import annotations
22
3+ from collections import OrderedDict
4+ import dataclasses
5+ import json as json_mod
36import logging
47import os
58import re
69import sys
7- from typing import IO , cast
10+ from typing import IO , Union , cast
811import warnings
912
1013import click
1821
1922lgr = logging .getLogger (__name__ )
2023
24+
25+ @dataclasses .dataclass
26+ class TruncationNotice :
27+ """Placeholder indicating omitted results in truncated output."""
28+
29+ omitted_count : int
30+
31+
2132STRUCTURED_FORMATS = ("json" , "json_pp" , "json_lines" , "yaml" )
2233
2334_EXT_TO_FORMAT = {
@@ -131,7 +142,9 @@ def validate_bids(
131142 "`dandi validate` instead. Proceeding to parse the call to `dandi validate` now." ,
132143 DeprecationWarning ,
133144 )
134- ctx .invoke (validate , paths = paths , grouping = grouping )
145+ ctx .invoke (
146+ validate , paths = paths , grouping = (grouping ,) if grouping != "none" else ()
147+ )
135148
136149
137150@click .command ()
@@ -145,12 +158,13 @@ def validate_bids(
145158@click .option (
146159 "--grouping" ,
147160 "-g" ,
148- help = "How to group error/warning reporting ." ,
161+ help = "How to group output. Repeat for hierarchical nesting, e.g. -g severity -g id ." ,
149162 type = click .Choice (
150163 ["none" , "path" , "severity" , "id" , "validator" , "standard" , "dandiset" ],
151164 case_sensitive = False ,
152165 ),
153- default = "none" ,
166+ multiple = True ,
167+ default = (),
154168)
155169@click .option ("--ignore" , metavar = "REGEX" , help = "Regex matching error IDs to ignore" )
156170@click .option (
@@ -181,6 +195,13 @@ def validate_bids(
181195 help = "Show summary statistics." ,
182196 default = False ,
183197)
198+ @click .option (
199+ "--max-per-group" ,
200+ type = int ,
201+ default = None ,
202+ help = "Limit results per group (or total if ungrouped). "
203+ "Excess results are replaced by a count of omitted items." ,
204+ )
184205@click .option (
185206 "--load" ,
186207 help = "Load validation results from JSONL file(s) instead of running validation." ,
@@ -196,11 +217,12 @@ def validate(
196217 ctx : click .Context ,
197218 paths : tuple [str , ...],
198219 ignore : str | None ,
199- grouping : str ,
220+ grouping : tuple [ str , ...] ,
200221 min_severity : str ,
201222 output_format : str = "human" ,
202223 output_file : str | None = None ,
203224 summary : bool = False ,
225+ max_per_group : int | None = None ,
204226 load : tuple [str , ...] = (),
205227 schema : str | None = None ,
206228 devel_debug : bool = False ,
@@ -210,6 +232,9 @@ def validate(
210232
211233 Exits with non-0 exit code if any file is not compliant.
212234 """
235+ # Normalize grouping: strip "none" values
236+ grouping = tuple (g for g in grouping if g != "none" )
237+
213238 # Auto-detect format from output file extension when --format not given
214239 if output_file is not None and output_format == "human" :
215240 detected = _format_from_ext (output_file )
@@ -221,6 +246,13 @@ def validate(
221246 )
222247 output_format = detected
223248
249+ # JSONL is incompatible with grouping (flat format, no nesting)
250+ if grouping and output_format == "json_lines" :
251+ raise click .UsageError (
252+ "--grouping is incompatible with json_lines format "
253+ "(JSONL is a flat format that cannot represent nested groups)."
254+ )
255+
224256 if load and paths :
225257 raise click .UsageError ("--load and positional paths are mutually exclusive." )
226258
@@ -234,19 +266,31 @@ def validate(
234266 filtered = _filter_results (results , min_severity , ignore )
235267
236268 if output_format == "human" :
237- _render_human (filtered , grouping )
269+ _render_human (filtered , grouping , max_per_group = max_per_group )
238270 if summary :
239271 _print_summary (filtered , sys .stdout )
240272 _exit_if_errors (filtered )
241273 elif output_file is not None :
242274 with open (output_file , "w" ) as fh :
243- _render_structured (filtered , output_format , fh )
275+ _render_structured (
276+ filtered ,
277+ output_format ,
278+ fh ,
279+ grouping ,
280+ max_per_group = max_per_group ,
281+ )
244282 lgr .info ("Validation output written to %s" , output_file )
245283 if summary :
246284 _print_summary (filtered , sys .stderr )
247285 _exit_if_errors (filtered )
248286 else :
249- _render_structured (filtered , output_format , sys .stdout )
287+ _render_structured (
288+ filtered ,
289+ output_format ,
290+ sys .stdout ,
291+ grouping ,
292+ max_per_group = max_per_group ,
293+ )
250294 if summary :
251295 _print_summary (filtered , sys .stderr )
252296 # Auto-save sidecar next to logfile (skip when loading)
@@ -316,12 +360,39 @@ def _render_structured(
316360 results : list [ValidationResult ],
317361 output_format : str ,
318362 out : IO [str ],
363+ grouping : tuple [str , ...] = (),
364+ max_per_group : int | None = None ,
319365) -> None :
320366 """Render validation results in a structured format."""
321- formatter = _get_formatter (output_format , out = out )
322- with formatter :
323- for r in results :
324- formatter (r .model_dump (mode = "json" ))
367+ if grouping :
368+ # Grouped output: build nested dict, serialize directly
369+ grouped : GroupedResults | TruncatedResults = _group_results (results , grouping )
370+ if max_per_group is not None :
371+ grouped = _truncate_leaves (grouped , max_per_group )
372+ data = _serialize_grouped (grouped )
373+ if output_format in ("json" , "json_pp" ):
374+ indent = 2 if output_format == "json_pp" else None
375+ json_mod .dump (data , out , indent = indent , sort_keys = True , default = str )
376+ out .write ("\n " )
377+ elif output_format == "yaml" :
378+ import ruamel .yaml
379+
380+ yaml = ruamel .yaml .YAML (typ = "safe" )
381+ yaml .default_flow_style = False
382+ yaml .dump (data , out )
383+ else :
384+ raise ValueError (f"Unsupported format for grouped output: { output_format } " )
385+ else :
386+ items : list [dict ] = [r .model_dump (mode = "json" ) for r in results ]
387+ if max_per_group is not None and len (items ) > max_per_group :
388+ items = items [:max_per_group ]
389+ items .append (
390+ {"_truncated" : True , "omitted_count" : len (results ) - max_per_group }
391+ )
392+ formatter = _get_formatter (output_format , out = out )
393+ with formatter :
394+ for item in items :
395+ formatter (item )
325396
326397
327398def _exit_if_errors (results : list [ValidationResult ]) -> None :
@@ -348,20 +419,88 @@ def _group_key(issue: ValidationResult, grouping: str) -> str:
348419 raise NotImplementedError (f"Unsupported grouping: { grouping } " )
349420
350421
422+ # Recursive grouped type: either a nested OrderedDict or leaf list
423+ GroupedResults = Union ["OrderedDict[str, GroupedResults]" , list [ValidationResult ]]
424+
425+ # Leaf items after possible truncation
426+ LeafItem = Union [ValidationResult , TruncationNotice ]
427+ TruncatedResults = Union ["OrderedDict[str, TruncatedResults]" , list [LeafItem ]]
428+
429+
430+ def _group_results (
431+ results : list [ValidationResult ],
432+ levels : tuple [str , ...],
433+ ) -> GroupedResults :
434+ """Group results recursively by the given hierarchy of grouping levels.
435+
436+ Returns a nested OrderedDict with leaf values as lists of ValidationResult.
437+ With zero levels, returns the flat list unchanged.
438+ """
439+ if not levels :
440+ return results
441+ key_fn = levels [0 ]
442+ remaining = levels [1 :]
443+ groups : OrderedDict [str , list [ValidationResult ]] = OrderedDict ()
444+ for r in results :
445+ k = _group_key (r , key_fn )
446+ groups .setdefault (k , []).append (r )
447+ if remaining :
448+ return OrderedDict ((k , _group_results (v , remaining )) for k , v in groups .items ())
449+ # mypy can't resolve the recursive type alias, but this is correct:
450+ # OrderedDict[str, list[VR]] is a valid GroupedResults
451+ return cast ("GroupedResults" , groups )
452+
453+
454+ def _truncate_leaves (
455+ grouped : GroupedResults | TruncatedResults , max_per_group : int
456+ ) -> TruncatedResults :
457+ """Truncate leaf lists to *max_per_group* items, appending a TruncationNotice."""
458+ if isinstance (grouped , list ):
459+ if len (grouped ) > max_per_group :
460+ kept : list [LeafItem ] = list (grouped [:max_per_group ])
461+ kept .append (TruncationNotice (len (grouped ) - max_per_group ))
462+ return kept
463+ return cast ("TruncatedResults" , grouped )
464+ return OrderedDict (
465+ (k , _truncate_leaves (v , max_per_group )) for k , v in grouped .items ()
466+ )
467+
468+
469+ def _serialize_grouped (grouped : GroupedResults | TruncatedResults ) -> dict | list :
470+ """Convert grouped results to a JSON-serializable nested dict/list."""
471+ if isinstance (grouped , list ):
472+ result : list [dict ] = []
473+ for item in grouped :
474+ if isinstance (item , TruncationNotice ):
475+ result .append ({"_truncated" : True , "omitted_count" : item .omitted_count })
476+ else :
477+ result .append (item .model_dump (mode = "json" ))
478+ return result
479+ return {k : _serialize_grouped (v ) for k , v in grouped .items ()}
480+
481+
351482def _render_human (
352483 issues : list [ValidationResult ],
353- grouping : str ,
484+ grouping : tuple [str , ...],
485+ max_per_group : int | None = None ,
354486) -> None :
355487 """Render validation results in human-readable colored format."""
356- if grouping == "none" :
357- purviews = [i .purview for i in issues ]
488+ if not grouping :
489+ shown = issues
490+ omitted = 0
491+ if max_per_group is not None and len (issues ) > max_per_group :
492+ shown = issues [:max_per_group ]
493+ omitted = len (issues ) - max_per_group
494+ purviews = [i .purview for i in shown ]
358495 display_errors (
359496 purviews ,
360- [i .id for i in issues ],
361- cast ("list[Severity]" , [i .severity for i in issues ]),
362- [i .message for i in issues ],
497+ [i .id for i in shown ],
498+ cast ("list[Severity]" , [i .severity for i in shown ]),
499+ [i .message for i in shown ],
363500 )
364- elif grouping == "path" :
501+ if omitted :
502+ click .secho (f"... and { pluralize (omitted , 'more issue' )} " , fg = "cyan" )
503+ elif grouping == ("path" ,):
365504 # Legacy path grouping: de-duplicate purviews, show per-path
366505 purviews = list (set (i .purview for i in issues ))
367506 for purview in purviews :
@@ -373,39 +512,80 @@ def _render_human(
373512 [i .message for i in applies_to ],
374513 )
375514 else :
376- # Generic grouped rendering with section headers
377- from collections import OrderedDict
515+ grouped : GroupedResults | TruncatedResults = _group_results (issues , grouping )
516+ if max_per_group is not None :
517+ grouped = _truncate_leaves (grouped , max_per_group )
518+ _render_human_grouped (grouped , depth = 0 )
519+
520+ if not any (r .severity is not None and r .severity >= Severity .ERROR for r in issues ):
521+ click .secho ("No errors found." , fg = "green" )
522+
378523
379- groups : OrderedDict [str , list [ValidationResult ]] = OrderedDict ()
380- for issue in issues :
381- key = _group_key (issue , grouping )
382- groups .setdefault (key , []).append (issue )
524+ def _count_leaves (grouped : GroupedResults | TruncatedResults ) -> int :
525+ """Count total items in a grouped structure (including omitted counts)."""
526+ if isinstance (grouped , list ):
527+ return sum (
528+ item .omitted_count if isinstance (item , TruncationNotice ) else 1
529+ for item in grouped
530+ )
531+ return sum (_count_leaves (v ) for v in grouped .values ())
383532
384- for key , group_issues in groups .items ():
385- header = f"=== { key } ({ pluralize (len (group_issues ), 'issue' )} ) ==="
533+
534+ def _render_human_grouped (
535+ grouped : GroupedResults | TruncatedResults ,
536+ depth : int ,
537+ ) -> None :
538+ """Recursively render grouped results with nested indented section headers."""
539+ indent = " " * depth
540+ if isinstance (grouped , list ):
541+ # Leaf level: render individual issues
542+ for issue in grouped :
543+ if isinstance (issue , TruncationNotice ):
544+ click .secho (
545+ f"{ indent } ... and { pluralize (issue .omitted_count , 'more issue' )} " ,
546+ fg = "cyan" ,
547+ )
548+ continue
549+ msg = f"{ indent } [{ issue .id } ] { issue .purview } — { issue .message } "
550+ fg = _get_severity_color (
551+ [issue .severity ] if issue .severity is not None else []
552+ )
553+ click .secho (msg , fg = fg )
554+ else :
555+ for key , value in grouped .items ():
556+ count = _count_leaves (value )
557+ header = f"{ indent } === { key } ({ pluralize (count , 'issue' )} ) ==="
558+ # Determine color from all issues in this group
559+ all_issues = _collect_all_issues (value )
386560 fg = _get_severity_color (
387561 cast (
388562 "list[Severity]" ,
389- [i .severity for i in group_issues if i .severity is not None ],
563+ [i .severity for i in all_issues if i .severity is not None ],
390564 )
391565 )
392566 click .secho (header , fg = fg , bold = True )
393- for issue in group_issues :
394- msg = f" [{ issue .id } ] { issue .purview } — { issue .message } "
395- ifg = _get_severity_color (
396- [issue .severity ] if issue .severity is not None else []
397- )
398- click .secho (msg , fg = ifg )
567+ _render_human_grouped (value , depth + 1 )
399568
400- if not any (r .severity is not None and r .severity >= Severity .ERROR for r in issues ):
401- click .secho ("No errors found." , fg = "green" )
569+
570+ def _collect_all_issues (
571+ grouped : GroupedResults | TruncatedResults ,
572+ ) -> list [ValidationResult ]:
573+ """Flatten a grouped structure into a list of all ValidationResults."""
574+ if isinstance (grouped , list ):
575+ return [item for item in grouped if isinstance (item , ValidationResult )]
576+ result : list [ValidationResult ] = []
577+ for v in grouped .values ():
578+ result .extend (_collect_all_issues (v ))
579+ return result
402580
403581
404582def _process_issues (
405583 issues : list [ValidationResult ],
406- grouping : str ,
584+ grouping : str | tuple [ str , ...] ,
407585) -> None :
408586 """Legacy wrapper: render human output and exit if errors."""
587+ if isinstance (grouping , str ):
588+ grouping = (grouping ,) if grouping != "none" else ()
409589 _render_human (issues , grouping )
410590 _exit_if_errors (issues )
411591
0 commit comments