Skip to content

Commit 5bda918

Browse files
committed
feat: add json multiscraper
1 parent 4d42d7b commit 5bda918

File tree

5 files changed

+15
-24
lines changed

5 files changed

+15
-24
lines changed

examples/local_models/json_scraper_multi_ollama.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
Module for showing how PDFScraper multi works
33
"""
44
import os
5-
from scrapegraphai.graphs import PdfScraperMultiGraph
5+
import json
6+
from scrapegraphai.graphs import JSONScraperMultiGraph
67

78
graph_config = {
89
"llm": {
@@ -25,23 +26,14 @@
2526
with open(file_path, 'r', encoding="utf-8") as file:
2627
text = file.read()
2728

28-
29-
json_scraper_graph = JSONScraperGraph(
30-
prompt="List me all the authors, title and genres of the books",
31-
source=text, # Pass the content of the file, not the file object
29+
sources = [text, text]
30+
31+
multiple_search_graph = JSONScraperMultiGraph(
32+
prompt= "List me all the authors, title and genres of the books",
33+
source= sources,
34+
schema=None,
3235
config=graph_config
3336
)
3437

35-
36-
37-
results = []
38-
for source in sources:
39-
pdf_scraper_graph = PdfScraperMultiGraph(
40-
prompt=prompt,
41-
source=source,
42-
config=graph_config
43-
)
44-
result = pdf_scraper_graph.run()
45-
results.append(result)
46-
47-
print(results)
38+
result = multiple_search_graph.run()
39+
print(json.dumps(result, indent=4))

examples/local_models/pdf_scraper_multi_ollama.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
"temperature": 0,
1717
},
1818
"verbose": True,
19-
"headless": False,
2019
}
2120

2221
# Covert to list

scrapegraphai/graphs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@
1717
from .omni_search_graph import OmniSearchGraph
1818
from .smart_scraper_multi_graph import SmartScraperMultiGraph
1919
from .pdf_scraper_multi import PdfScraperMultiGraph
20-
from .json_scraper_multi import JsonScraperMultiGraph
20+
from .json_scraper_multi import JSONScraperMultiGraph

scrapegraphai/graphs/json_scraper_multi.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
JsonScraperMultiGraph Module
2+
JSONScraperMultiGraph Module
33
"""
44

55
from copy import copy, deepcopy
@@ -15,9 +15,9 @@
1515
)
1616

1717

18-
class JsonScraperMultiGraph(AbstractGraph):
18+
class JSONScraperMultiGraph(AbstractGraph):
1919
"""
20-
JsonScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
20+
JSONScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
2121
It only requires a user prompt and a list of URLs.
2222
2323
Attributes:

scrapegraphai/nodes/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@
1919
from .generate_answer_pdf_node import GenerateAnswerPDFNode
2020
from .graph_iterator_node import GraphIteratorNode
2121
from .merge_answers_node import MergeAnswersNode
22-
from .generate_answer_omni_node import GenerateAnswerOmniNode
22+
from .generate_answer_omni_node import GenerateAnswerOmniNode

0 commit comments

Comments
 (0)