|
9 | 9 | from langchain.prompts import PromptTemplate |
10 | 10 | from langchain.output_parsers import CommaSeparatedListOutputParser |
11 | 11 |
|
12 | | -from .base_node import BaseNode |
13 | 12 | from langchain.output_parsers import CommaSeparatedListOutputParser |
14 | 13 | from langchain.prompts import PromptTemplate |
15 | 14 | from langchain_community.document_loaders import AsyncChromiumLoader |
|
18 | 17 | from ..utils.logging import get_logger |
19 | 18 | from .base_node import BaseNode |
20 | 19 |
|
21 | | - |
22 | 20 | class RobotsNode(BaseNode): |
23 | 21 | """ |
24 | 22 | A node responsible for checking if a website is scrapeable or not based on the robots.txt file. |
@@ -48,13 +46,14 @@ def __init__( |
48 | 46 | output: List[str], |
49 | 47 | node_config: Optional[dict] = None, |
50 | 48 | node_name: str = "Robots", |
51 | | - |
52 | 49 | ): |
53 | 50 | super().__init__(node_name, "node", input, output, 1) |
54 | 51 |
|
55 | 52 | self.llm_model = node_config["llm_model"] |
56 | 53 |
|
57 | | - self.force_scraping = False if node_config is None else node_config.get("force_scraping", False) |
| 54 | + self.force_scraping = ( |
| 55 | + False if node_config is None else node_config.get("force_scraping", False) |
| 56 | + ) |
58 | 57 | self.verbose = ( |
59 | 58 | True if node_config is None else node_config.get("verbose", False) |
60 | 59 | ) |
@@ -111,14 +110,11 @@ def execute(self, state: dict) -> dict: |
111 | 110 | base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" |
112 | 111 | loader = AsyncChromiumLoader(f"{base_url}/robots.txt") |
113 | 112 | document = loader.load() |
114 | | - if "ollama" in self.llm_model["model_name"]: |
115 | | - self.llm_model["model_name"] = self.llm_model["model_name"].split("/")[ |
116 | | - -1 |
117 | | - ] |
118 | | - model = self.llm_model["model_name"].split("/")[-1] |
119 | | - |
| 113 | + if "ollama" in self.llm_model.model_name: |
| 114 | + self.llm_model.model_name = self.llm_model.model_name.split("/")[-1] |
| 115 | + model = self.llm_model.model_name.split("/")[-1] |
120 | 116 | else: |
121 | | - model = self.llm_model["model_name"] |
| 117 | + model = self.llm_model.model_name |
122 | 118 | try: |
123 | 119 | agent = robots_dictionary[model] |
124 | 120 |
|
|
0 commit comments