|
13 | 13 |
|
14 | 14 | class CSVScraperGraph(AbstractGraph): |
15 | 15 | """ |
16 | | - SmartScraper is a comprehensive web scraping tool that automates the process of extracting |
17 | | - information from web pages using a natural language model to interpret and answer prompts. |
| 16 | + A class representing a graph for extracting information from CSV files. |
| 17 | +
|
| 18 | + Attributes: |
| 19 | + prompt (str): The prompt used to generate an answer. |
| 20 | + source (str): The source of the data, which can be either a CSV |
| 21 | + file or a directory containing multiple CSV files. |
| 22 | + config (dict): Additional configuration parameters needed by some nodes in the graph. |
| 23 | +
|
| 24 | + Methods: |
| 25 | + __init__ (prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None): |
| 26 | + Initializes the CSVScraperGraph with a prompt, source, and configuration. |
| 27 | +
|
| 28 | + __init__ initializes the CSVScraperGraph class. It requires the user's prompt as input, |
| 29 | + along with the source of the data (which can be either a single CSV file or a directory |
| 30 | + containing multiple CSV files), and any necessary configuration parameters. |
| 31 | +
|
| 32 | + Methods: |
| 33 | + _create_graph (): Creates the graph of nodes representing the workflow for web scraping. |
| 34 | +
|
| 35 | + _create_graph generates the web scraping process workflow |
| 36 | + represented by a directed acyclic graph. |
| 37 | + This method is used internally to create the scraping pipeline |
| 38 | + without having to execute it immediately. The result is a BaseGraph instance |
| 39 | + containing nodes that fetch and process data from a source, and other helper functions. |
| 40 | +
|
| 41 | + Methods: |
| 42 | + run () -> str: Executes the web scraping process and returns |
| 43 | + the answer to the prompt as a string. |
| 44 | + run runs the CSVScraperGraph class to extract information from a CSV file based |
| 45 | + on the user's prompt. It requires no additional arguments since all necessary data |
| 46 | + is stored within the class instance. The method fetches the relevant chunks of text or speech, |
| 47 | + generates an answer based on these chunks, and returns this answer as a string. |
18 | 48 | """ |
19 | 49 |
|
20 | 50 | def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None): |
|
0 commit comments