-
Notifications
You must be signed in to change notification settings - Fork 6.5k
[Utils] add utilities for checking if certain utilities are properly documented #7763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 36 commits
Commits
Show all changes
37 commits
Select commit
Hold shift + click to select a range
25033da
add; utility to check if attn_procs,norms,acts are properly documented.
sayakpaul 9398e0f
add support listing to the workflows.
sayakpaul 132c68b
Merge branch 'main' into feat/check-doc-listing
sayakpaul 57ca5be
change to 2024.
sayakpaul 8532285
Merge branch 'main' into feat/check-doc-listing
sayakpaul b5c9aeb
small fixes.
sayakpaul 40128ac
Merge branch 'main' into feat/check-doc-listing
sayakpaul c625166
does adding detailed docstrings help?
sayakpaul 8b58696
Merge branch 'main' into feat/check-doc-listing
sayakpaul 80d0a7f
Merge branch 'main' into feat/check-doc-listing
sayakpaul d064b11
Merge branch 'main' into feat/check-doc-listing
sayakpaul 45daa98
Merge branch 'main' into feat/check-doc-listing
sayakpaul 5663ba5
fix
sayakpaul 0653e2d
Merge branch 'main' into feat/check-doc-listing
sayakpaul dac63dd
uncomment image processor check
sayakpaul 900cd1c
quality
sayakpaul 6dc3d19
Merge branch 'main' into feat/check-doc-listing
sayakpaul 8449186
fix, thanks to @mishig.
sayakpaul af2370b
Apply suggestions from code review
sayakpaul c4c9fc4
Merge branch 'main' into feat/check-doc-listing
sayakpaul 15b2f57
style
sayakpaul 12c9ac4
Merge branch 'main' into feat/check-doc-listing
sayakpaul f3443d0
Merge branch 'main' into feat/check-doc-listing
sayakpaul b8b0fd1
Merge branch 'main' into feat/check-doc-listing
sayakpaul 63989af
resolve conflicts.
sayakpaul 4227392
JointAttnProcessor2_0
sayakpaul 0034db2
fixes
sayakpaul eb5a8b2
resolve conflicts.
sayakpaul a2aa752
fixes
sayakpaul 005a2e9
fixes
sayakpaul b653eaa
fixes
sayakpaul 75136e6
fixes
sayakpaul 7eb617a
fixes
sayakpaul 80be186
Merge branch 'main' into feat/check-doc-listing
sayakpaul ef03777
Merge branch 'main' into feat/check-doc-listing
sayakpaul 53a3361
fixes
sayakpaul be989a6
Update docs/source/en/api/normalization.md
sayakpaul File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| import os | ||
| import sys | ||
| import unittest | ||
| from unittest.mock import mock_open, patch | ||
|
|
||
|
|
||
| git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) | ||
| sys.path.append(os.path.join(git_repo_path, "utils")) | ||
|
|
||
| from check_support_list import check_documentation # noqa: E402 | ||
|
|
||
|
|
||
| class TestCheckSupportList(unittest.TestCase): | ||
| def setUp(self): | ||
| # Mock doc and source contents that we can reuse | ||
| self.doc_content = """# Documentation | ||
| ## FooProcessor | ||
|
|
||
| [[autodoc]] module.FooProcessor | ||
|
|
||
| ## BarProcessor | ||
|
|
||
| [[autodoc]] module.BarProcessor | ||
| """ | ||
| self.source_content = """ | ||
| class FooProcessor(nn.Module): | ||
| pass | ||
|
|
||
| class BarProcessor(nn.Module): | ||
| pass | ||
| """ | ||
|
|
||
| def test_check_documentation_all_documented(self): | ||
| # In this test, both FooProcessor and BarProcessor are documented | ||
| with patch("builtins.open", mock_open(read_data=self.doc_content)) as doc_file: | ||
| doc_file.side_effect = [ | ||
| mock_open(read_data=self.doc_content).return_value, | ||
| mock_open(read_data=self.source_content).return_value, | ||
| ] | ||
|
|
||
| undocumented = check_documentation( | ||
| doc_path="fake_doc.md", | ||
| src_path="fake_source.py", | ||
| doc_regex=r"\[\[autodoc\]\]\s([^\n]+)", | ||
| src_regex=r"class\s+(\w+Processor)\(.*?nn\.Module.*?\):", | ||
| ) | ||
| self.assertEqual(len(undocumented), 0, f"Expected no undocumented classes, got {undocumented}") | ||
|
|
||
| def test_check_documentation_missing_class(self): | ||
| # In this test, only FooProcessor is documented, but BarProcessor is missing from the docs | ||
| doc_content_missing = """# Documentation | ||
| ## FooProcessor | ||
|
|
||
| [[autodoc]] module.FooProcessor | ||
| """ | ||
| with patch("builtins.open", mock_open(read_data=doc_content_missing)) as doc_file: | ||
| doc_file.side_effect = [ | ||
| mock_open(read_data=doc_content_missing).return_value, | ||
| mock_open(read_data=self.source_content).return_value, | ||
| ] | ||
|
|
||
| undocumented = check_documentation( | ||
| doc_path="fake_doc.md", | ||
| src_path="fake_source.py", | ||
| doc_regex=r"\[\[autodoc\]\]\s([^\n]+)", | ||
| src_regex=r"class\s+(\w+Processor)\(.*?nn\.Module.*?\):", | ||
| ) | ||
| self.assertIn("BarProcessor", undocumented, f"BarProcessor should be undocumented, got {undocumented}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| # coding=utf-8 | ||
| # Copyright 2024 The HuggingFace Inc. team. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| """ | ||
| Utility that checks that modules like attention processors are listed in the documentation file. | ||
|
|
||
| ```bash | ||
| python utils/check_support_list.py | ||
| ``` | ||
|
|
||
| It has no auto-fix mode. | ||
| """ | ||
|
|
||
| import os | ||
| import re | ||
|
|
||
|
|
||
| # All paths are set with the intent that you run this script from the root of the repo | ||
| REPO_PATH = "." | ||
|
|
||
|
|
||
| def read_documented_classes(doc_path, autodoc_regex=r"\[\[autodoc\]\]\s([^\n]+)"): | ||
| """ | ||
| Reads documented classes from a doc file using a regex to find lines like [[autodoc]] my.module.Class. | ||
| Returns a list of documented class names (just the class name portion). | ||
| """ | ||
| with open(os.path.join(REPO_PATH, doc_path), "r") as f: | ||
| doctext = f.read() | ||
| matches = re.findall(autodoc_regex, doctext) | ||
| return [match.split(".")[-1] for match in matches] | ||
|
|
||
|
|
||
| def read_source_classes(src_path, class_regex, exclude_conditions=None): | ||
| """ | ||
| Reads class names from a source file using a regex that captures class definitions. | ||
| Optionally exclude classes based on a list of conditions (functions that take class name and return bool). | ||
| """ | ||
| if exclude_conditions is None: | ||
| exclude_conditions = [] | ||
| with open(os.path.join(REPO_PATH, src_path), "r") as f: | ||
| doctext = f.read() | ||
| classes = re.findall(class_regex, doctext) | ||
| # Filter out classes that meet any of the exclude conditions | ||
| filtered_classes = [c for c in classes if not any(cond(c) for cond in exclude_conditions)] | ||
| return filtered_classes | ||
|
|
||
|
|
||
| def check_documentation(doc_path, src_path, doc_regex, src_regex, exclude_conditions=None): | ||
| """ | ||
| Generic function to check if all classes defined in `src_path` are documented in `doc_path`. | ||
| Returns a set of undocumented class names. | ||
| """ | ||
| documented = set(read_documented_classes(doc_path, doc_regex)) | ||
| source_classes = set(read_source_classes(src_path, src_regex, exclude_conditions=exclude_conditions)) | ||
|
|
||
| # Find which classes in source are not documented in a deterministic way. | ||
| undocumented = sorted(source_classes - documented) | ||
| return undocumented | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| # Define the checks we need to perform | ||
| checks = { | ||
| "Attention Processors": { | ||
| "doc_path": "docs/source/en/api/attnprocessor.md", | ||
| "src_path": "src/diffusers/models/attention_processor.py", | ||
| "doc_regex": r"\[\[autodoc\]\]\s([^\n]+)", | ||
| "src_regex": r"class\s+(\w+Processor(?:\d*_?\d*))[:(]", | ||
| "exclude_conditions": [lambda c: "LoRA" in c, lambda c: c == "Attention"], | ||
| }, | ||
| "Image Processors": { | ||
| "doc_path": "docs/source/en/api/image_processor.md", | ||
| "src_path": "src/diffusers/image_processor.py", | ||
| "doc_regex": r"\[\[autodoc\]\]\s([^\n]+)", | ||
| "src_regex": r"class\s+(\w+Processor(?:\d*_?\d*))[:(]", | ||
| }, | ||
| "Activations": { | ||
| "doc_path": "docs/source/en/api/activations.md", | ||
| "src_path": "src/diffusers/models/activations.py", | ||
| "doc_regex": r"\[\[autodoc\]\]\s([^\n]+)", | ||
| "src_regex": r"class\s+(\w+)\s*\(.*?nn\.Module.*?\):", | ||
| }, | ||
| "Normalizations": { | ||
| "doc_path": "docs/source/en/api/normalization.md", | ||
| "src_path": "src/diffusers/models/normalization.py", | ||
| "doc_regex": r"\[\[autodoc\]\]\s([^\n]+)", | ||
| "src_regex": r"class\s+(\w+)\s*\(.*?nn\.Module.*?\):", | ||
| "exclude_conditions": [ | ||
| # Exclude LayerNorm as it's an intentional exception | ||
| lambda c: c == "LayerNorm" | ||
| ], | ||
| }, | ||
| "LoRA Mixins": { | ||
| "doc_path": "docs/source/en/api/loaders/lora.md", | ||
| "src_path": "src/diffusers/loaders/lora_pipeline.py", | ||
| "doc_regex": r"\[\[autodoc\]\]\s([^\n]+)", | ||
| "src_regex": r"class\s+(\w+)\s*\(.*?nn\.Module.*?\):", | ||
| }, | ||
| } | ||
|
|
||
| missing_items = {} | ||
| for category, params in checks.items(): | ||
| undocumented = check_documentation( | ||
| doc_path=params["doc_path"], | ||
| src_path=params["src_path"], | ||
| doc_regex=params["doc_regex"], | ||
| src_regex=params["src_regex"], | ||
| exclude_conditions=params.get("exclude_conditions"), | ||
| ) | ||
| if undocumented: | ||
| missing_items[category] = undocumented | ||
|
|
||
| # If we have any missing items, raise a single combined error | ||
| if missing_items: | ||
| error_msg = ["Some classes are not documented properly:\n"] | ||
| for category, classes in missing_items.items(): | ||
| error_msg.append(f"- {category}: {', '.join(sorted(classes))}") | ||
| raise ValueError("\n".join(error_msg)) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.