Skip to content

Commit cf243a1

Browse files
Fix fix_and_overwrite mode of utils/check_docstring.py (#39369)
* bug in fix mode of check_docstring
1 parent 6902ffa commit cf243a1

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

utils/check_docstrings.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -823,16 +823,19 @@ def match_docstring_with_signature(obj: Any) -> Optional[tuple[str, str]]:
823823
except OSError:
824824
source = []
825825

826+
# Find the line where the docstring starts
826827
idx = 0
827828
while idx < len(source) and '"""' not in source[idx]:
828829
idx += 1
829830

830831
ignore_order = False
831832
if idx < len(source):
832833
line_before_docstring = source[idx - 1]
834+
# Match '# no-format' (allowing surrounding whitespaces)
833835
if re.search(r"^\s*#\s*no-format\s*$", line_before_docstring):
834-
# This object is ignored
836+
# This object is ignored by the auto-docstring tool
835837
return
838+
# Match '# ignore-order' (allowing surrounding whitespaces)
836839
elif re.search(r"^\s*#\s*ignore-order\s*$", line_before_docstring):
837840
ignore_order = True
838841

@@ -959,14 +962,15 @@ def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str):
959962
idx -= 1
960963
idx += 1
961964

962-
if "".join(source[start_idx:idx])[:-1] != old_doc_args:
965+
# `old_doc_args` is built from `obj.__doc__`, which may have
966+
# different indentation than the raw source from `inspect.getsourcelines`.
967+
# We use `inspect.cleandoc` to remove indentation uniformly from both
968+
# strings before comparing them.
969+
source_args_as_str = "".join(source[start_idx:idx])
970+
if inspect.cleandoc(source_args_as_str) != inspect.cleandoc(old_doc_args):
963971
# Args are not fully defined in the docstring of this object
964-
# This can happen due to a mismatch in indentation calculation where the docstring parsing
965-
# in match_docstring_with_signature uses obj.__doc__.split("\n") while here we use
966-
# inspect.getsourcelines(obj) which can have different line endings or indentation.
967-
# See https://github.com/huggingface/transformers/pull/38915/files#r2200675302 for more details.
968972
obj_file = find_source_file(obj)
969-
actual_args_section = "".join(source[start_idx:idx])[:-1]
973+
actual_args_section = source_args_as_str.rstrip()
970974
raise ValueError(
971975
f"Cannot fix docstring of {obj.__name__} in {obj_file} because the argument section in the source code "
972976
f"does not match the expected format. This usually happens when:\n"
@@ -983,6 +987,10 @@ def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str):
983987

984988
# Replace content
985989
lines = content.split("\n")
990+
prev_line_indentation = find_indent(lines[line_number + start_idx - 2])
991+
# Now increase the indentation of every line in new_doc_args by prev_line_indentation
992+
new_doc_args = "\n".join([f"{' ' * prev_line_indentation}{line}" for line in new_doc_args.split("\n")])
993+
986994
lines = lines[: line_number + start_idx - 1] + [new_doc_args] + lines[line_number + idx - 1 :]
987995

988996
print(f"Fixing the docstring of {obj.__name__} in {obj_file}.")

0 commit comments

Comments
 (0)