|
119 | 119 | "use-cases", |
120 | 120 | ) |
121 | 121 |
|
| 122 | +GENERIC_LINK_ANCHORS = { |
| 123 | + "click here", |
| 124 | + "documentation", |
| 125 | + "docs", |
| 126 | + "here", |
| 127 | + "learn more", |
| 128 | + "link", |
| 129 | + "more", |
| 130 | + "page", |
| 131 | + "read more", |
| 132 | + "this article", |
| 133 | + "this documentation", |
| 134 | + "this guide", |
| 135 | + "this link", |
| 136 | + "this page", |
| 137 | + "website", |
| 138 | +} |
| 139 | + |
| 140 | +GENERIC_VIDEO_TITLES = { |
| 141 | + "demo", |
| 142 | + "demo video", |
| 143 | + "overview video", |
| 144 | + "tutorial video", |
| 145 | + "video", |
| 146 | + "walkthrough video", |
| 147 | +} |
| 148 | + |
| 149 | +RAW_URL_ANCHOR = re.compile( |
| 150 | + r"^(?:https?://|www\.|(?:[a-z0-9-]+\.)+(?:ai|app|co|com|dev|edu|gov|io|net|org)(?:/|$))", |
| 151 | + re.IGNORECASE, |
| 152 | +) |
| 153 | +MARKDOWN_LINK = re.compile(r"\[([^\]]*)\]\(([^)]+)\)") |
| 154 | +VIDEO_EMBED_TITLE = re.compile(r"\btitle\s*=\s*([\"'])(.*?)\1", re.DOTALL) |
| 155 | + |
122 | 156 | # Common bolded words that are NOT product terms (false positive suppression) |
123 | 157 | COMMON_BOLD_WORDS = { |
124 | 158 | # General emphasis words |
@@ -194,7 +228,7 @@ def find_changed_md_files() -> List[Path]: |
194 | 228 | for line in result.stdout.strip().split("\n"): |
195 | 229 | if (line.endswith(".md") or line.endswith(".mdx")) and os.path.exists(line): |
196 | 230 | p = Path(line) |
197 | | - if not any(part in EXCLUDED_DIRS for part in p.parts): |
| 231 | + if not any(part in EXCLUDED_DIRS for part in p.parts) and not p.is_relative_to(CHANGELOG_DIR): |
198 | 232 | files.append(p) |
199 | 233 | return sorted(files) |
200 | 234 | except subprocess.CalledProcessError: |
@@ -222,6 +256,104 @@ def check_frontmatter(content: str, filepath: str) -> List[Issue]: |
222 | 256 | return issues |
223 | 257 |
|
224 | 258 |
|
| 259 | +def _strip_markdown_formatting(text: str) -> str: |
| 260 | + """Remove lightweight Markdown/HTML formatting from link text.""" |
| 261 | + text = re.sub(r"`([^`]*)`", r"\1", text) |
| 262 | + text = re.sub(r"\*\*([^*]*)\*\*", r"\1", text) |
| 263 | + text = re.sub(r"__([^_]*)__", r"\1", text) |
| 264 | + text = re.sub(r"\*([^*]*)\*", r"\1", text) |
| 265 | + text = re.sub(r"_([^_]*)_", r"\1", text) |
| 266 | + text = re.sub(r"<[^>]+>", "", text) |
| 267 | + return text |
| 268 | + |
| 269 | + |
| 270 | +def _normalize_link_text(text: str) -> str: |
| 271 | + """Normalize link text for generic-anchor comparisons.""" |
| 272 | + text = _strip_markdown_formatting(text) |
| 273 | + text = re.sub(r"\s+", " ", text).strip().lower() |
| 274 | + return text.strip(" \t\n\r.,:;!?()[]{}\"'") |
| 275 | + |
| 276 | + |
| 277 | +def _meaningful_words(text: str) -> List[str]: |
| 278 | + """Return lowercase words that carry semantic meaning for comparisons.""" |
| 279 | + stopwords = {"a", "an", "and", "for", "in", "of", "on", "the", "to", "with", "x"} |
| 280 | + words = re.findall(r"[a-z0-9]+", text.lower()) |
| 281 | + return [word for word in words if len(word) > 2 and word not in stopwords] |
| 282 | + |
| 283 | + |
| 284 | +def check_link_quality(lines: List[str], filepath: str) -> List[Issue]: |
| 285 | + """Check Markdown links for descriptive, contextual anchor text.""" |
| 286 | + issues = [] |
| 287 | + in_code_block = False |
| 288 | + article_pattern = re.compile( |
| 289 | + r"\b(go to|open|visit|navigate to|view)\s+\[((?:Runs|Integrations|Schedules|Environments|Secrets) page in the Oz web app)\]", |
| 290 | + re.IGNORECASE, |
| 291 | + ) |
| 292 | + redundant_prefix = re.compile( |
| 293 | + r"^\s*[-*]\s+\*\*([^*]+)\*\*\s*[:—-]\s*\[([^\]]+)\]\([^)]+\)\.?\s*$" |
| 294 | + ) |
| 295 | + |
| 296 | + for i, line in enumerate(lines, 1): |
| 297 | + stripped = line.strip() |
| 298 | + if stripped.startswith("```"): |
| 299 | + in_code_block = not in_code_block |
| 300 | + continue |
| 301 | + if in_code_block: |
| 302 | + continue |
| 303 | + |
| 304 | + for m in MARKDOWN_LINK.finditer(line): |
| 305 | + # Skip Markdown images. |
| 306 | + if m.start() > 0 and line[m.start() - 1] == "!": |
| 307 | + continue |
| 308 | + |
| 309 | + anchor = m.group(1).strip() |
| 310 | + normalized = _normalize_link_text(anchor) |
| 311 | + if not normalized: |
| 312 | + issues.append(Issue( |
| 313 | + filepath, i, "link-anchor", |
| 314 | + "Markdown link has empty anchor text; use descriptive link text that explains the destination", |
| 315 | + "error", |
| 316 | + )) |
| 317 | + continue |
| 318 | + |
| 319 | + if RAW_URL_ANCHOR.match(normalized): |
| 320 | + issues.append(Issue( |
| 321 | + filepath, i, "link-anchor", |
| 322 | + f"Raw URL used as link text: \"{anchor}\". Name the destination instead.", |
| 323 | + "warning", |
| 324 | + )) |
| 325 | + elif normalized in GENERIC_LINK_ANCHORS: |
| 326 | + issues.append(Issue( |
| 327 | + filepath, i, "link-anchor", |
| 328 | + f"Generic link text: \"{anchor}\". Use descriptive anchor text that explains what users will find.", |
| 329 | + "warning", |
| 330 | + )) |
| 331 | + |
| 332 | + for m in article_pattern.finditer(line): |
| 333 | + action = m.group(1) |
| 334 | + page_name = m.group(2) |
| 335 | + issues.append(Issue( |
| 336 | + filepath, i, "link-context", |
| 337 | + f"Add \"the\" before named destination page: \"{action} the [{page_name}]\"", |
| 338 | + "warning", |
| 339 | + )) |
| 340 | + |
| 341 | + m = redundant_prefix.match(line) |
| 342 | + if m: |
| 343 | + prefix = _normalize_link_text(m.group(1)) |
| 344 | + anchor = _normalize_link_text(m.group(2)) |
| 345 | + prefix_words = _meaningful_words(prefix) |
| 346 | + anchor_words = set(_meaningful_words(anchor)) |
| 347 | + if prefix_words and all(word in anchor_words for word in prefix_words): |
| 348 | + issues.append(Issue( |
| 349 | + filepath, i, "link-context", |
| 350 | + f"Link text repeats the bold prefix \"{m.group(1)}\". Remove the prefix or add distinct context.", |
| 351 | + "warning", |
| 352 | + )) |
| 353 | + |
| 354 | + return issues |
| 355 | + |
| 356 | + |
225 | 357 | def check_settings_paths(lines: List[str], filepath: str) -> List[Issue]: |
226 | 358 | """Detect backtick-wrapped Settings paths that should be bold per-segment.""" |
227 | 359 | issues = [] |
@@ -438,6 +570,81 @@ def check_screenshot_widths(lines: List[str], filepath: str) -> List[Issue]: |
438 | 570 | return issues |
439 | 571 |
|
440 | 572 |
|
| 573 | +def _iter_video_embed_tags(lines: List[str]) -> List[Tuple[int, str]]: |
| 574 | + """Return (line_number, tag_text) for VideoEmbed components.""" |
| 575 | + tags: List[Tuple[int, str]] = [] |
| 576 | + in_code_block = False |
| 577 | + collecting = False |
| 578 | + start_line = 0 |
| 579 | + tag_parts: List[str] = [] |
| 580 | + |
| 581 | + for i, line in enumerate(lines, 1): |
| 582 | + stripped = line.strip() |
| 583 | + if stripped.startswith("```"): |
| 584 | + in_code_block = not in_code_block |
| 585 | + continue |
| 586 | + if in_code_block: |
| 587 | + continue |
| 588 | + |
| 589 | + if not collecting and "<VideoEmbed" in line: |
| 590 | + collecting = True |
| 591 | + start_line = i |
| 592 | + tag_parts = [line] |
| 593 | + if ">" in line: |
| 594 | + tags.append((start_line, "\n".join(tag_parts))) |
| 595 | + collecting = False |
| 596 | + tag_parts = [] |
| 597 | + continue |
| 598 | + |
| 599 | + if collecting: |
| 600 | + tag_parts.append(line) |
| 601 | + if ">" in line: |
| 602 | + tags.append((start_line, "\n".join(tag_parts))) |
| 603 | + collecting = False |
| 604 | + tag_parts = [] |
| 605 | + |
| 606 | + if collecting and tag_parts: |
| 607 | + tags.append((start_line, "\n".join(tag_parts))) |
| 608 | + return tags |
| 609 | + |
| 610 | + |
| 611 | +def _is_generic_video_title(title: str) -> bool: |
| 612 | + """Return True when a VideoEmbed title is too generic for SEO/accessibility.""" |
| 613 | + normalized = re.sub(r"\s+", " ", title).strip().lower() |
| 614 | + normalized = normalized.strip(" \t\n\r.,:;!?()[]{}\"'") |
| 615 | + if normalized in GENERIC_VIDEO_TITLES: |
| 616 | + return True |
| 617 | + if re.search(r"\b(?:video|demo)\s+\d+\b", normalized): |
| 618 | + return True |
| 619 | + words = re.findall(r"[a-z0-9]+", normalized) |
| 620 | + if normalized.endswith(" video") and len(words) <= 3: |
| 621 | + return True |
| 622 | + return False |
| 623 | + |
| 624 | + |
| 625 | +def check_video_embed_titles(lines: List[str], filepath: str) -> List[Issue]: |
| 626 | + """Check VideoEmbed components for specific title props.""" |
| 627 | + issues = [] |
| 628 | + for line_number, tag in _iter_video_embed_tags(lines): |
| 629 | + title_match = VIDEO_EMBED_TITLE.search(tag) |
| 630 | + if not title_match or not title_match.group(2).strip(): |
| 631 | + issues.append(Issue( |
| 632 | + filepath, line_number, "video-title", |
| 633 | + "VideoEmbed missing title prop. Add a specific title that describes the integration, workflow, feature, or task shown.", |
| 634 | + "error", |
| 635 | + )) |
| 636 | + continue |
| 637 | + |
| 638 | + title = title_match.group(2).strip() |
| 639 | + if _is_generic_video_title(title): |
| 640 | + issues.append(Issue( |
| 641 | + filepath, line_number, "video-title", |
| 642 | + f"Generic VideoEmbed title: \"{title}\". Use a specific title that describes what the video shows.", |
| 643 | + "warning", |
| 644 | + )) |
| 645 | + return issues |
| 646 | + |
| 647 | + |
441 | 648 | def check_callout_syntax(lines: List[str], filepath: str) -> List[Issue]: |
442 | 649 | """Check for malformed hint/callout syntax.""" |
443 | 650 | issues = [] |
@@ -664,9 +871,11 @@ def run_all_checks(filepath: Path) -> List[Issue]: |
664 | 871 | issues.extend(check_frontmatter(content, str(filepath))) |
665 | 872 | issues.extend(check_settings_paths(lines, str(filepath))) |
666 | 873 | issues.extend(check_ui_element_backticks(lines, str(filepath))) |
| 874 | + issues.extend(check_link_quality(lines, str(filepath))) |
667 | 875 | issues.extend(check_header_case(lines, str(filepath))) |
668 | 876 | issues.extend(check_image_alt_text(lines, str(filepath))) |
669 | 877 | issues.extend(check_screenshot_widths(lines, str(filepath))) |
| 878 | + issues.extend(check_video_embed_titles(lines, str(filepath))) |
670 | 879 | issues.extend(check_callout_syntax(lines, str(filepath))) |
671 | 880 | issues.extend(check_product_casing(lines, str(filepath))) |
672 | 881 | issues.extend(check_oz_terms(lines, str(filepath))) |
|
0 commit comments