Skip to content

Commit 9b3695d

Browse files
committed
Update search_link_node.py
1 parent c5a3f89 commit 9b3695d

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

scrapegraphai/nodes/search_link_node.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
"""
44
from typing import List, Optional
55
import re
6-
from tqdm import tqdm
76
from urllib.parse import urlparse, parse_qs
7+
from tqdm import tqdm
88
from langchain.prompts import PromptTemplate
99
from langchain_core.output_parsers import JsonOutputParser
1010
from langchain_core.runnables import RunnableParallel
@@ -74,10 +74,11 @@ def _is_language_url(self, url):
7474
parsed_url = urlparse(url)
7575
query_params = parse_qs(parsed_url.query)
7676

77-
return any(indicator in parsed_url.path.lower() or indicator in query_params for indicator in lang_indicators)
77+
return any(indicator in parsed_url.path.lower() \
78+
or indicator in query_params for indicator in lang_indicators)
7879
def _is_potentially_irrelevant(self, url):
7980
if not self.filter_links:
80-
return False
81+
return False
8182

8283
irrelevant_keywords = self.filter_config.get("irrelevant_keywords", [])
8384
return any(keyword in url.lower() for keyword in irrelevant_keywords)

0 commit comments

Comments
 (0)