@@ -84,15 +84,7 @@ def is_likely_external_link(url: str) -> bool:
84
84
]
85
85
86
86
# Check if the path contains any domain-like patterns
87
- for pattern in domain_patterns :
88
- if pattern in path :
89
- return True
90
-
91
- # Check for unusual path length which might indicate a malformed URL
92
- if len (path .split ("/" )) > 10 : # Unusually deep path
93
- return True
94
-
95
- return False
87
+ return any (pattern in path for pattern in domain_patterns )
96
88
97
89
98
90
def is_html_single_url (url : str ) -> bool :
@@ -125,22 +117,16 @@ def is_in_scope(url: str, base_url: str) -> bool:
125
117
# But also handle the case where the base URL doesn't include the actual page
126
118
if "/html-single/" in base_path :
127
119
# If this is a specific doc, make sure it's within that doc's URL path
128
- # Check if url_path starts with base_path or is the exact doc index
129
- is_in_scope = url_path .startswith (base_path ) or url_path == base_path .removesuffix ("/index" )
120
+ return url_path .startswith (base_path ) or url_path == base_path .removesuffix ("/index" )
130
121
else :
131
122
# For entire docs, anything with openshift_container_platform/VERSION in scope
132
123
try :
133
- base_parts = base_url .split ("openshift_container_platform/" )
134
- if len (base_parts ) > 1 :
135
- version = base_parts [1 ].split ("/" )[0 ]
136
- version_part = f"openshift_container_platform/{ version } "
137
- is_in_scope = version_part in url and is_html_single_url (url )
138
- else :
139
- is_in_scope = False
124
+ version_part = f"openshift_container_platform/{ base_url .split ('openshift_container_platform/' )[1 ].split ('/' )[0 ]} "
125
+ is_in_scope = version_part in url and is_html_single_url (url )
140
126
except IndexError :
141
127
is_in_scope = False
142
128
143
- return is_in_scope
129
+ return is_in_scope
144
130
145
131
146
132
def get_local_path (url : str , output_dir : Path ) -> Path :
0 commit comments