From 3fa0662de45295739ab34827ff0a9eed3b049876 Mon Sep 17 00:00:00 2001
From: Matt Linville <matt.linville@wandb.com>
Date: Mon, 20 Oct 2025 12:49:39 -0700
Subject: [PATCH 1/3] fix: fix URL parsing in docstrings

Fixes #95

- Restrict _RE_ARGSTART pattern to valid Python identifiers only
- Add heuristics to detect non-argument lines containing URLs
- Prevent lines with URLs from being incorrectly parsed as arguments

This fixes the issue where URLs in docstrings were being split
and wrapped with incorrect HTML tags.
---
 src/lazydocs/generation.py | 41 +++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)
diff --git a/src/lazydocs/generation.py b/src/lazydocs/generation.py
index 7013dd9..1bc0ce9 100755
--- a/src/lazydocs/generation.py
+++ b/src/lazydocs/generation.py
@@ -33,7 +33,8 @@
 )
 
 _RE_TYPED_ARGSTART = re.compile(r"^([\w\[\]_]{1,}?)[ ]*?\((.*?)\):[ ]+(.{2,})", re.IGNORECASE)
-_RE_ARGSTART = re.compile(r"^(.+):[ ]+(.{2,})$", re.IGNORECASE)
+# Restrict to valid Python identifier-like patterns to avoid matching URLs
+_RE_ARGSTART = re.compile(r"^([\w\[\]_]+):[ ]+(.{2,})$", re.IGNORECASE)
 
 _RE_CODE_TEXT = re.compile(r"^```[\w\-\.]*[ ]*$", re.IGNORECASE)
 
@@ -583,11 +584,41 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int,
                 argindent = indent
             elif arg_list and not literal_block and _RE_ARGSTART.match(line):
                 # start of an exception-type block
-                out.append(
-                    "- "
-                    + _RE_ARGSTART.sub(r"<b>`\1`</b>: \2", line)
+                # Check if this looks like a URL being incorrectly parsed
+                match = _RE_ARGSTART.match(line)
+                # Check if the part before the colon contains URL indicators or 
+                # is likely descriptive text rather than an argument name
+                before_colon = match.group(1) if match else ""
+                after_colon = match.group(2) if match else ""
+                
+                # Heuristics to detect non-argument lines:
+                # 1. The text before colon contains "http" (part of a URL)
+                # 2. The line contains "://" (URL protocol)
+                # 3. The text before colon is too long to be an argument name (>40 chars)
+                # 4. The text before colon contains common English words that aren't argument names
+                is_not_argument = (
+                    "http" in before_colon.lower() or
+                    "://" in line or
+                    len(before_colon) > 40 or
+                    # Check for common descriptive phrases (without trailing space)
+                    any(word in before_colon.lower() for word in ["see", "to find", "refer", "documentation", "available"])
                 )
-                argindent = indent
+                
+                if match and is_not_argument:
+                    # This is likely descriptive text with a colon, not an argument
+                    # Treat it as regular text continuation
+                    if argindent > 0:
+                        padding = max(indent - argindent + offset, 0)
+                        out.append(" " * padding + line.replace("\n", "\n" + " " * padding))
+                    else:
+                        out.append(line)
+                else:
+                    # This is a real argument
+                    out.append(
+                        "- "
+                        + _RE_ARGSTART.sub(r"<b>`\1`</b>: \2", line)
+                    )
+                    argindent = indent
             elif indent > argindent:
                 # attach docs text of argument
                 # * (blockindent + 2)

From 7226bd220738a301f39534745b16b20612759929 Mon Sep 17 00:00:00 2001
From: Matt Linville <matt.linville@wandb.com>
Date: Mon, 20 Oct 2025 12:58:42 -0700
Subject: [PATCH 2/3] fix: address code quality issues

- Remove unused variable 'after_colon'
- Extract common_words list to avoid long line
- Improve code readability
---
 src/lazydocs/generation.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/lazydocs/generation.py b/src/lazydocs/generation.py
index 1bc0ce9..2da18af 100755
--- a/src/lazydocs/generation.py
+++ b/src/lazydocs/generation.py
@@ -589,19 +589,18 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int,
                 # Check if the part before the colon contains URL indicators or 
                 # is likely descriptive text rather than an argument name
                 before_colon = match.group(1) if match else ""
-                after_colon = match.group(2) if match else ""
                 
                 # Heuristics to detect non-argument lines:
                 # 1. The text before colon contains "http" (part of a URL)
                 # 2. The line contains "://" (URL protocol)
                 # 3. The text before colon is too long to be an argument name (>40 chars)
                 # 4. The text before colon contains common English words that aren't argument names
+                common_words = ["see", "to find", "refer", "documentation", "available"]
                 is_not_argument = (
                     "http" in before_colon.lower() or
                     "://" in line or
                     len(before_colon) > 40 or
-                    # Check for common descriptive phrases (without trailing space)
-                    any(word in before_colon.lower() for word in ["see", "to find", "refer", "documentation", "available"])
+                    any(word in before_colon.lower() for word in common_words)
                 )
                 
                 if match and is_not_argument:

From 58126142f056d502b49f5165ee6c58c0d9ee49cd Mon Sep 17 00:00:00 2001
From: Matt Linville <matt.linville@wandb.com>
Date: Mon, 20 Oct 2025 13:00:57 -0700
Subject: [PATCH 3/3] fix: remove trailing whitespace

- Remove trailing whitespace from line 589
---
 src/lazydocs/generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lazydocs/generation.py b/src/lazydocs/generation.py
index 2da18af..c9d00a9 100755
--- a/src/lazydocs/generation.py
+++ b/src/lazydocs/generation.py
@@ -586,7 +586,7 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int,
                 # start of an exception-type block
                 # Check if this looks like a URL being incorrectly parsed
                 match = _RE_ARGSTART.match(line)
-                # Check if the part before the colon contains URL indicators or 
+                # Check if the part before the colon contains URL indicators or
                 # is likely descriptive text rather than an argument name
                 before_colon = match.group(1) if match else ""