Skip to content

Commit 47645d9

Browse files
committed
removed unused files
1 parent 5211cbe commit 47645d9

23 files changed

+42
-169
lines changed

manual deployment/autorequirements.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

manual deployment/commit_and_push.sh

Lines changed: 0 additions & 36 deletions
This file was deleted.

manual deployment/commit_and_push_with_tests.sh

Lines changed: 0 additions & 36 deletions
This file was deleted.

manual deployment/deploy_on_pip.sh

Lines changed: 0 additions & 15 deletions
This file was deleted.

manual deployment/installation.sh

Lines changed: 0 additions & 8 deletions
This file was deleted.

scrapegraphai/builders/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
__init__.py file for builders folder
2+
__init__.py file for builders folder
33
"""
44

55
from .graph_builder import GraphBuilder

scrapegraphai/builders/graph_builder.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from langchain_community.chat_models import ErnieBotChat
77
from langchain_google_genai import ChatGoogleGenerativeAI
88
from langchain_openai import ChatOpenAI
9-
109
from ..helpers import nodes_metadata, graph_schema
1110

1211
class GraphBuilder:

scrapegraphai/helpers/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""
22
__init__.py for the helpers folder
33
"""
4-
54
from .nodes_metadata import nodes_metadata
65
from .schemas import graph_schema
76
from .models_tokens import models_tokens

scrapegraphai/integrations/burr_bridge.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Bridge class to integrate Burr into ScrapeGraphAI graphs
33
[Burr](https://github.com/DAGWorks-Inc/burr)
44
"""
5-
65
import re
76
import uuid
87
from hashlib import md5

scrapegraphai/nodes/fetch_node_level_k.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
"""
2+
fetch_node_level_k module
3+
"""
14
from typing import List, Optional
2-
from .base_node import BaseNode
3-
from ..docloaders import ChromiumLoader
4-
from ..utils.cleanup_html import cleanup_html
5-
from ..utils.convert_to_md import convert_to_md
5+
from urllib.parse import urljoin
66
from langchain_core.documents import Document
77
from bs4 import BeautifulSoup
8-
from urllib.parse import quote, urljoin
8+
from .base_node import BaseNode
9+
from ..docloaders import ChromiumLoader
910

1011
class FetchNodeLevelK(BaseNode):
1112
"""
@@ -102,17 +103,18 @@ def fetch_content(self, source: str, loader_kwargs) -> Optional[str]:
102103
Optional[str]: The fetched HTML content or None if fetching failed.
103104
"""
104105
self.logger.info(f"--- (Fetching HTML from: {source}) ---")
105-
106+
106107
if self.browser_base is not None:
107108
try:
108109
from ..docloaders.browser_base import browser_base_fetch
109110
except ImportError:
110111
raise ImportError("""The browserbase module is not installed.
111112
Please install it using `pip install browserbase`.""")
112113

113-
data = browser_base_fetch(self.browser_base.get("api_key"),
114+
data = browser_base_fetch(self.browser_base.get("api_key"),
114115
self.browser_base.get("project_id"), [source])
115-
document = [Document(page_content=content, metadata={"source": source}) for content in data]
116+
document = [Document(page_content=content,
117+
metadata={"source": source}) for content in data]
116118
else:
117119
loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
118120
document = loader.load()
@@ -179,7 +181,8 @@ def obtain_content(self, documents: List, loader_kwargs) -> List:
179181
full_links = self.get_full_links(source, links)
180182

181183
for link in full_links:
182-
if not any(d.get('source', '') == link for d in documents) and not any(d.get('source', '') == link for d in new_documents):
184+
if not any(d.get('source', '') == link for d in documents) \
185+
and not any(d.get('source', '') == link for d in new_documents):
183186
new_documents.append({"source": link})
184187

185188
documents.extend(new_documents)
@@ -208,7 +211,8 @@ def process_links(self, base_url: str, links: list,
208211

209212
if current_depth < depth:
210213
new_links = self.extract_links(link_content)
211-
content_dict.update(self.process_links(full_link, new_links, loader_kwargs, depth, current_depth + 1))
214+
content_dict.update(self.process_links(full_link, new_links,
215+
loader_kwargs, depth, current_depth + 1))
212216
else:
213217
self.logger.warning(f"Failed to fetch content for {full_link}")
214218
return content_dict

0 commit comments

Comments
 (0)