diff --git a/examples/anthropic/pdf_scraper_graph_anthropic.py b/examples/anthropic/pdf_scraper_graph_anthropic.py deleted file mode 100644 index ee221ac6..00000000 --- a/examples/anthropic/pdf_scraper_graph_anthropic.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os, json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -graph_config = { - "llm": { - "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", - }, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/anthropic/pdf_scraper_multi_anthropic.py b/examples/anthropic/pdf_scraper_multi_anthropic.py deleted file mode 100644 index 2d117c35..00000000 --- a/examples/anthropic/pdf_scraper_multi_anthropic.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -graph_config = { - "llm": { - "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "anthropic/claude-3-haiku-20240307", - }, -} - -# *************** -# Covert to list -# *************** - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/azure/pdf_scraper_azure.py b/examples/azure/pdf_scraper_azure.py deleted file mode 100644 index 3a4e7e58..00000000 --- a/examples/azure/pdf_scraper_azure.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ -graph_config = { - "llm": { - "api_key": os.environ["AZURE_OPENAI_KEY"], - "model": "azure_openai/gpt-4o" - }, - "verbose": True, - "headless": False -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/bedrock/pdf_scraper_graph_bedrock.py b/examples/bedrock/pdf_scraper_graph_bedrock.py deleted file mode 100644 index ba51d351..00000000 --- a/examples/bedrock/pdf_scraper_graph_bedrock.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Basic example of scraping pipeline using SmartScraper -""" -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -graph_config = { - "llm": { - "client": "client_name", - "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", - "temperature": 0.0 - } -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/bedrock/pdf_scraper_graph_multi_bedrock.py b/examples/bedrock/pdf_scraper_graph_multi_bedrock.py deleted file mode 100644 index 45a71b74..00000000 --- a/examples/bedrock/pdf_scraper_graph_multi_bedrock.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import json -from scrapegraphai.graphs import PdfScraperMultiGraph - -graph_config = { - "llm": { - "client": "client_name", - "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", - "temperature": 0.0 - } -} -# *************** -# Covert to list -# *************** - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/deepseek/pdf_scraper_graph_deepseek.py b/examples/deepseek/pdf_scraper_graph_deepseek.py deleted file mode 100644 index 5d313c2a..00000000 --- a/examples/deepseek/pdf_scraper_graph_deepseek.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Basic example of scraping pipeline using SmartScraper -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.utils import prettify_exec_info -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -deepseek_key = os.getenv("DEEPSEEK_APIKEY") - -graph_config = { - "llm": { - "model": "deepseek/deepseek-chat", - "api_key": deepseek_key, - }, - "verbose": True, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/deepseek/pdf_scraper_multi_deepseek.py b/examples/deepseek/pdf_scraper_multi_deepseek.py deleted file mode 100644 index 59727a62..00000000 --- a/examples/deepseek/pdf_scraper_multi_deepseek.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -deepseek_key = os.getenv("DEEPSEEK_APIKEY") - -graph_config = { - "llm": { - "model": "deepseek/deepseek-chat", - "api_key": deepseek_key, - }, - "verbose": True, -} - -# *************** -# Covert to list -# *************** - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/ernie/pdf_scraper_graph_ernie.py b/examples/ernie/pdf_scraper_graph_ernie.py deleted file mode 100644 index 3bcf46ee..00000000 --- a/examples/ernie/pdf_scraper_graph_ernie.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -pds scraper module -""" -import json -from scrapegraphai.graphs import PDFScraperGraph - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -graph_config = { - "llm": { - "model": "ernie/ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - } -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/fireworks/pdf_scraper_fireworks.py b/examples/fireworks/pdf_scraper_fireworks.py deleted file mode 100644 index 182664d0..00000000 --- a/examples/fireworks/pdf_scraper_fireworks.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -pdf_scraper module -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -fireworks_api_key = os.getenv("FIREWORKS_APIKEY") - -graph_config = { - "llm": { - "api_key": fireworks_api_key, - "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct" - }, - "verbose": True, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/fireworks/pdf_scraper_multi_fireworks.py b/examples/fireworks/pdf_scraper_multi_fireworks.py deleted file mode 100644 index c1077061..00000000 --- a/examples/fireworks/pdf_scraper_multi_fireworks.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from typing import List -from dotenv import load_dotenv -from pydantic import BaseModel, Field -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -fireworks_api_key = os.getenv("FIREWORKS_APIKEY") - -graph_config = { - "llm": { - "api_key": fireworks_api_key, - "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct" - }, - "verbose": True, -} - -# ************************************************ -# Define the output schema for the graph -# ************************************************ - -class Article(BaseModel): - independent_variable: str = Field(description="(IV): The variable that is manipulated or considered as the primary cause affecting other variables.") - dependent_variable: str = Field(description="(DV) The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.") - exogenous_shock: str = Field(description="Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.") - -class Articles(BaseModel): - articles: List[Article] - -# ************************************************ -# Define the sources for the graph -# ************************************************ - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons." -] - -prompt = """ -Analyze the abstracts provided from an academic journal article to extract and clearly identify the Independent Variable (IV), Dependent Variable (DV), and Exogenous Shock. -""" - -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=Articles, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/google_genai/pdf_scraper_graph_gemini.py b/examples/google_genai/pdf_scraper_graph_gemini.py deleted file mode 100644 index 0af92709..00000000 --- a/examples/google_genai/pdf_scraper_graph_gemini.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Basic example of scraping pipeline using SmartScraper -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -gemini_key = os.getenv("GOOGLE_APIKEY") - -graph_config = { - "llm": { - "api_key": gemini_key, - "model": "google_genai/gemini-pro", - }, -} - - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/google_genai/pdf_scraper_multi_gemini.py b/examples/google_genai/pdf_scraper_multi_gemini.py deleted file mode 100644 index 6a0faf86..00000000 --- a/examples/google_genai/pdf_scraper_multi_gemini.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -gemini_key = os.getenv("GOOGLE_APIKEY") - -graph_config = { - "llm": { - "api_key": gemini_key, - "model": "google_genai/gemini-pro", - }, - "library": "beautifulsoup" -} - -# *************** -# Covert to list -# *************** - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/google_vertexai/pdf_scraper_graph_gemini.py b/examples/google_vertexai/pdf_scraper_graph_gemini.py deleted file mode 100644 index 80af0ec8..00000000 --- a/examples/google_vertexai/pdf_scraper_graph_gemini.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -Basic example of scraping pipeline using SmartScraper -""" - -import os, json -from dotenv import load_dotenv -from scrapegraphai.utils import prettify_exec_info -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -gemini_key = os.getenv("GOOGLE_APIKEY") - -graph_config = { - "llm": { - "api_key": gemini_key, - "model": "google_vertexai/gemini-1.5-pro", - }, -} - - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/google_vertexai/pdf_scraper_multi_gemini.py b/examples/google_vertexai/pdf_scraper_multi_gemini.py deleted file mode 100644 index fb6a46a7..00000000 --- a/examples/google_vertexai/pdf_scraper_multi_gemini.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -gemini_key = os.getenv("GOOGLE_APIKEY") - -graph_config = { - "llm": { - "api_key": gemini_key, - "model": "google_vertexai/gemini-1.5-pro", - }, - "library": "beautifulsoup" -} - -# *************** -# Covert to list -# *************** - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/groq/pdf_scraper_graph_groq.py b/examples/groq/pdf_scraper_graph_groq.py deleted file mode 100644 index 7cf18d33..00000000 --- a/examples/groq/pdf_scraper_graph_groq.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Example of pdf_scraper_graph -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - - -# ************************************************ -# Define the configuration for the graph -# ************************************************ -groq_key = os.getenv("GROQ_APIKEY") - -graph_config = { - "llm": { - "model": "groq/gemma-7b-it", - "api_key": groq_key, - "temperature": 0 - }, - "verbose": True, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/groq/pdf_scraper_multi_groq.py b/examples/groq/pdf_scraper_multi_groq.py deleted file mode 100644 index c43a7087..00000000 --- a/examples/groq/pdf_scraper_multi_groq.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() -groq_key = os.getenv("GROQ_APIKEY") - -graph_config = { - "llm": { - "model": "groq/gemma-7b-it", - "api_key": groq_key, - "temperature": 0 - }, - "library": "beautifulsoup" -} - -# *************** -# Covert to list -# *************** - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py b/examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py deleted file mode 100644 index eb0b1895..00000000 --- a/examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py +++ /dev/null @@ -1,48 +0,0 @@ -import os, json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph -from langchain_community.llms import HuggingFaceEndpoint -from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings - -load_dotenv() - - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') - -repo_id = "mistralai/Mistral-7B-Instruct-v0.2" - -llm_model_instance = HuggingFaceEndpoint( - repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN -) - -embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( - api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2" -) - -graph_config = { - "llm": {"model_instance": llm_model_instance}, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py b/examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py deleted file mode 100644 index 4db809b2..00000000 --- a/examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph -from langchain_community.llms import HuggingFaceEndpoint -from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings -load_dotenv() - -HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') - -repo_id = "mistralai/Mistral-7B-Instruct-v0.2" - -llm_model_instance = HuggingFaceEndpoint( - repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN -) - -embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( - api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2" -) - -graph_config = { - "llm": {"model_instance": llm_model_instance}, -} - -# Covert to list -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/local_models/pdf_scraper_multi_ollama.py b/examples/local_models/pdf_scraper_multi_ollama.py deleted file mode 100644 index ce258bf6..00000000 --- a/examples/local_models/pdf_scraper_multi_ollama.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import json -from scrapegraphai.graphs import PdfScraperMultiGraph - -graph_config = { - "llm": { - "model": "ollama/llama3", - "temperature": 0, - "format": "json", # Ollama needs the format to be specified explicitly - "model_tokens": 4000, - }, - "verbose": True, -} - -# Covert to list -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/local_models/pdf_scraper_ollama.py b/examples/local_models/pdf_scraper_ollama.py deleted file mode 100644 index 84eb40f9..00000000 --- a/examples/local_models/pdf_scraper_ollama.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Module for showing how PDFScraper works -""" -from scrapegraphai.graphs import PDFScraperGraph - -graph_config = { - "llm": { - "model": "ollama/mistral", - "temperature": 0, - "format": "json", # Ollama needs the format to be specified explicitly - "model_tokens": 4000, - }, - "verbose": True, - "headless": False, -} - -# Covert to list -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - # Add more sources here -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -results = [] -for source in sources: - pdf_scraper_graph = PDFScraperGraph( - prompt=prompt, - source=source, - config=graph_config - ) - result = pdf_scraper_graph.run() - results.append(result) - -print(results) diff --git a/examples/mistral/md_scraper_mistral.py b/examples/mistral/md_scraper_mistral.py index ee3604a7..135f08ba 100644 --- a/examples/mistral/md_scraper_mistral.py +++ b/examples/mistral/md_scraper_mistral.py @@ -1,9 +1,9 @@ """ -Basic example of scraping pipeline using MDScraperGraph from MD documents +Basic example of scraping pipeline using DocumentScraperGraph from MD documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import MDScraperGraph +from scrapegraphai.graphs import DocumentScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,10 +33,10 @@ } # ************************************************ -# Create the MDScraperGraph instance and run it +# Create the DocumentScraperGraph instance and run it # ************************************************ -md_scraper_graph = MDScraperGraph( +md_scraper_graph = DocumentScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config diff --git a/examples/mistral/pdf_scraper_mistral.py b/examples/mistral/pdf_scraper_mistral.py deleted file mode 100644 index 7a36fc0c..00000000 --- a/examples/mistral/pdf_scraper_mistral.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -pdf_scraper_mistral module -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -mistral_key = os.getenv("MISTRAL_API_KEY") - -graph_config = { - "llm": { - "api_key": mistral_key, - "model": "mistralai/open-mistral-nemo", - }, - "verbose": True, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/mistral/pdf_scraper_multi_mistral.py b/examples/mistral/pdf_scraper_multi_mistral.py deleted file mode 100644 index e9f1613f..00000000 --- a/examples/mistral/pdf_scraper_multi_mistral.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from typing import List -from dotenv import load_dotenv -from pydantic import BaseModel, Field -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -mistral_key = os.getenv("MISTRAL_API_KEY") - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -graph_config = { - "llm": { - "api_key": mistral_key, - "model": "mistralai/open-mistral-nemo", - }, - "verbose": True, -} - -# ************************************************ -# Define the output schema for the graph -# ************************************************ - -class Article(BaseModel): - independent_variable: str = Field(description="(IV): The variable that is manipulated or considered as the primary cause affecting other variables.") - dependent_variable: str = Field(description="(DV) The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.") - exogenous_shock: str = Field(description="Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.") - -class Articles(BaseModel): - articles: List[Article] - -# ************************************************ -# Define the sources for the graph -# ************************************************ - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons." -] - -prompt = """ -Analyze the abstracts provided from an academic journal article to extract and clearly identify the Independent Variable (IV), Dependent Variable (DV), and Exogenous Shock. -""" - -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=Articles, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/nemotron/md_scraper_nemotron.py b/examples/nemotron/md_scraper_nemotron.py index 1748f8cf..8e925c03 100644 --- a/examples/nemotron/md_scraper_nemotron.py +++ b/examples/nemotron/md_scraper_nemotron.py @@ -1,9 +1,9 @@ """ -Basic example of scraping pipeline using MDScraperGraph from XML documents +Basic example of scraping pipeline using DocumentScraperGraph from XML documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import MDScraperGraph +from scrapegraphai.graphs import DocumentScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,10 +33,10 @@ } # ************************************************ -# Create the MDScraperGraph instance and run it +# Create the DocumentScraperGraph instance and run it # ************************************************ -md_scraper_graph = MDScraperGraph( +md_scraper_graph = DocumentScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config diff --git a/examples/nemotron/pdf_scraper_nemotron.py b/examples/nemotron/pdf_scraper_nemotron.py deleted file mode 100644 index 06b6364c..00000000 --- a/examples/nemotron/pdf_scraper_nemotron.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -pdf_scraper nemotron -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -nemotron_key = os.getenv("NEMOTRON_APIKEY") - -graph_config = { - "llm": { - "api_key": nemotron_key, - "model": "nvidia/meta/llama3-70b-instruct", - }, - "verbose": True, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/oneapi/pdf_scraper_graph_oneapi.py b/examples/oneapi/pdf_scraper_graph_oneapi.py deleted file mode 100644 index 4e9214b0..00000000 --- a/examples/oneapi/pdf_scraper_graph_oneapi.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -pdf_scraper_oneapi module -""" -import json -from scrapegraphai.graphs import PDFScraperGraph - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -graph_config = { - "llm": { - "api_key": "***************************", - "model": "oneapi/qwen-turbo", - "base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL - } -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/oneapi/pdf_scraper_multi_oneapi.py b/examples/oneapi/pdf_scraper_multi_oneapi.py deleted file mode 100644 index 7d0ce231..00000000 --- a/examples/oneapi/pdf_scraper_multi_oneapi.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { - "llm": { - "api_key": openai_key, - "model": "openai/gpt-3.5-turbo", - }, -} - -# Covert to list -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/openai/md_scraper_openai.py b/examples/openai/md_scraper_openai.py index 76775015..3a152243 100644 --- a/examples/openai/md_scraper_openai.py +++ b/examples/openai/md_scraper_openai.py @@ -1,9 +1,9 @@ """ -Basic example of scraping pipeline using MDScraperGraph from MD documents +Basic example of scraping pipeline using DocumentScraperGraph from MD documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import MDScraperGraph +from scrapegraphai.graphs import DocumentScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,10 +33,10 @@ } # ************************************************ -# Create the MDScraperGraph instance and run it +# Create the DocumentScraperGraph instance and run it # ************************************************ -md_scraper_graph = MDScraperGraph( +md_scraper_graph = DocumentScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config diff --git a/examples/openai/pdf_scraper_multi_openai.py b/examples/openai/pdf_scraper_multi_openai.py deleted file mode 100644 index 91e219e3..00000000 --- a/examples/openai/pdf_scraper_multi_openai.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from typing import List -from dotenv import load_dotenv -from pydantic import BaseModel, Field -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -openai_key = os.getenv("OPENAI_APIKEY") - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -graph_config = { - "llm": { - "api_key": openai_key, - "model": "openai/gpt-4o", - }, - "verbose": True, -} - -# ************************************************ -# Define the output schema for the graph -# ************************************************ - -class Article(BaseModel): - independent_variable: str = Field(description="(IV): The variable that is manipulated or considered as the primary cause affecting other variables.") - dependent_variable: str = Field(description="(DV) The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.") - exogenous_shock: str = Field(description="Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.") - -class Articles(BaseModel): - articles: List[Article] - -# ************************************************ -# Define the sources for the graph -# ************************************************ - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons." -] - -prompt = """ -Analyze the abstracts provided from an academic journal article to extract and clearly identify the Independent Variable (IV), Dependent Variable (DV), and Exogenous Shock. -""" - -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=Articles, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/examples/openai/pdf_scraper_openai.py b/examples/openai/pdf_scraper_openai.py deleted file mode 100644 index f7b99130..00000000 --- a/examples/openai/pdf_scraper_openai.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -pdf_scraper_openai example module -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph - -load_dotenv() - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { - "llm": { - "api_key": openai_key, - "model": "openai/gpt-4o", - }, - "verbose": True, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/together/pdf_scraper_graph_together.py b/examples/together/pdf_scraper_graph_together.py deleted file mode 100644 index c0f121c2..00000000 --- a/examples/together/pdf_scraper_graph_together.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Basic example of scraping pipeline using SmartScraper -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PDFScraperGraph -load_dotenv() - - -# ************************************************ -# Define the configuration for the graph -# ************************************************ - -together_key = os.getenv("TOGETHER_APIKEY") - -graph_config = { - "llm": { - "model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", - "api_key": together_key, - }, - "verbose": True, -} - -source = """ - The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian - circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature. - Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante - from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God. - Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood - through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided - by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love, - the Beatrice of his earlier poetry, through the celestial spheres of Paradise. -""" - -pdf_scraper_graph = PDFScraperGraph( - prompt="Summarize the text and find the main topics", - source=source, - config=graph_config, -) -result = pdf_scraper_graph.run() - -print(json.dumps(result, indent=4)) diff --git a/examples/together/pdf_scraper_multi_together.py b/examples/together/pdf_scraper_multi_together.py deleted file mode 100644 index a34b0337..00000000 --- a/examples/together/pdf_scraper_multi_together.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -Module for showing how PDFScraper multi works -""" -import os -import json -from dotenv import load_dotenv -from scrapegraphai.graphs import PdfScraperMultiGraph - -load_dotenv() - -together_key = os.getenv("TOGETHER_APIKEY") - -graph_config = { - "llm": { - "model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", - "api_key": together_key, - }, - "verbose": True, -} - -# *************** -# Covert to list -# *************** - -sources = [ - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", - "This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.", -] - -prompt = """ -You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements: - -Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables. -Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable. -Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV. -Response Format: For each abstract, present your response in the following structured format: - -Independent Variable (IV): -Dependent Variable (DV): -Exogenous Shock: - -Example Queries and Responses: - -Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking. - -Response: - -Independent Variable (IV): Employee happiness. -Dependent Variable (DV): Overall firm productivity. -Exogenous Shock: Sudden company-wide increase in bonus payments. - -Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons. - -Response: - -Independent Variable (IV): Exposure to social media. -Dependent Variable (DV): Mental health outcomes. -Exogenous Shock: staggered introduction of Facebook across U.S. colleges. -""" -# ******************************************************* -# Create the SmartScraperMultiGraph instance and run it -# ******************************************************* - -multiple_search_graph = PdfScraperMultiGraph( - prompt=prompt, - source= sources, - schema=None, - config=graph_config -) - -result = multiple_search_graph.run() -print(json.dumps(result, indent=4)) diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py index b5ffcc47..5b217bc9 100644 --- a/scrapegraphai/graphs/__init__.py +++ b/scrapegraphai/graphs/__init__.py @@ -11,17 +11,15 @@ from .xml_scraper_graph import XMLScraperGraph from .json_scraper_graph import JSONScraperGraph from .csv_scraper_graph import CSVScraperGraph -from .pdf_scraper_graph import PDFScraperGraph from .omni_scraper_graph import OmniScraperGraph from .omni_search_graph import OmniSearchGraph from .smart_scraper_multi_graph import SmartScraperMultiGraph -from .pdf_scraper_multi_graph import PdfScraperMultiGraph from .json_scraper_multi_graph import JSONScraperMultiGraph from .csv_scraper_multi_graph import CSVScraperMultiGraph from .xml_scraper_multi_graph import XMLScraperMultiGraph from .script_creator_multi_graph import ScriptCreatorMultiGraph -from .markdown_scraper_graph import MDScraperGraph -from .markdown_scraper_multi_graph import MDScraperMultiGraph +from .document_scraper_graph import DocumentScraperGraph +from .document_scraper_multi_graph import DocumentScraperMultiGraph from .search_link_graph import SearchLinkGraph from .screenshot_scraper_graph import ScreenshotScraperGraph from .smart_scraper_multi_concat_graph import SmartScraperMultiConcatGraph diff --git a/scrapegraphai/graphs/markdown_scraper_graph.py b/scrapegraphai/graphs/document_scraper_graph.py similarity index 95% rename from scrapegraphai/graphs/markdown_scraper_graph.py rename to scrapegraphai/graphs/document_scraper_graph.py index ed3c6856..39e54f4a 100644 --- a/scrapegraphai/graphs/markdown_scraper_graph.py +++ b/scrapegraphai/graphs/document_scraper_graph.py @@ -8,9 +8,9 @@ from .abstract_graph import AbstractGraph from ..nodes import FetchNode, ParseNode, GenerateAnswerNode -class MDScraperGraph(AbstractGraph): +class DocumentScraperGraph(AbstractGraph): """ - MDScraperGraph is a scraping pipeline that automates the process of + DocumentScraperGraph is a scraping pipeline that automates the process of extracting information from web pages using a natural language model to interpret and answer prompts. @@ -32,7 +32,7 @@ class MDScraperGraph(AbstractGraph): schema (BaseModel): The schema for the graph output. Example: - >>> smart_scraper = MDScraperGraph( + >>> smart_scraper = DocumentScraperGraph( ... "List me all the attractions in Chioggia.", ... "https://en.wikipedia.org/wiki/Chioggia", ... {"llm": {"model": "openai/gpt-3.5-turbo"}} diff --git a/scrapegraphai/graphs/markdown_scraper_multi_graph.py b/scrapegraphai/graphs/document_scraper_multi_graph.py similarity index 90% rename from scrapegraphai/graphs/markdown_scraper_multi_graph.py rename to scrapegraphai/graphs/document_scraper_multi_graph.py index b6a13111..aabed189 100644 --- a/scrapegraphai/graphs/markdown_scraper_multi_graph.py +++ b/scrapegraphai/graphs/document_scraper_multi_graph.py @@ -1,21 +1,21 @@ """ -MDScraperMultiGraph Module +DocumentScraperMultiGraph Module """ from copy import deepcopy from typing import List, Optional from pydantic import BaseModel from .base_graph import BaseGraph from .abstract_graph import AbstractGraph -from .markdown_scraper_graph import MDScraperGraph +from .document_scraper_graph import DocumentScraperGraph from ..nodes import ( GraphIteratorNode, MergeAnswersNode ) from ..utils.copy import safe_deepcopy -class MDScraperMultiGraph(AbstractGraph): +class DocumentScraperMultiGraph(AbstractGraph): """ - MDScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and + DocumentScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt. It only requires a user prompt and a list of URLs. Attributes: @@ -33,7 +33,7 @@ class MDScraperMultiGraph(AbstractGraph): schema (Optional[BaseModel]): The schema for the graph output. Example: - >>> search_graph = MDScraperMultiGraph( + >>> search_graph = DocumentScraperMultiGraph( ... "What is Chioggia famous for?", ... ["http://example.com/page1", "http://example.com/page2"], ... {"llm_model": {"model": "openai/gpt-3.5-turbo"}} @@ -60,7 +60,7 @@ def _create_graph(self) -> BaseGraph: input="user_prompt & jsons", output=["results"], node_config={ - "graph_instance": MDScraperGraph, + "graph_instance": DocumentScraperGraph, "scraper_config": self.copy_config, }, schema=self.copy_schema diff --git a/scrapegraphai/graphs/pdf_scraper_graph.py b/scrapegraphai/graphs/pdf_scraper_graph.py deleted file mode 100644 index 65ede542..00000000 --- a/scrapegraphai/graphs/pdf_scraper_graph.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -PDFScraperGraph Module -""" -from typing import Optional -from pydantic import BaseModel -from .base_graph import BaseGraph -from .abstract_graph import AbstractGraph -from ..nodes import ( - FetchNode, - ParseNode, - GenerateAnswerPDFNode -) - -class PDFScraperGraph(AbstractGraph): - """ - PDFScraperGraph is a scraping pipeline that extracts information from pdf files using a natural - language model to interpret and answer prompts. - - Attributes: - prompt (str): The prompt for the graph. - source (str): The source of the graph. - config (dict): Configuration parameters for the graph. - schema (BaseModel): The schema for the graph output. - llm_model: An instance of a language model client, configured for generating answers. - embedder_model: An instance of an embedding model client, - configured for generating embeddings. - verbose (bool): A flag indicating whether to show print statements during execution. - headless (bool): A flag indicating whether to run the graph in headless mode. - model_token (int): The token limit for the language model. - - Args: - prompt (str): The prompt for the graph. - source (str): The source of the graph. - config (dict): Configuration parameters for the graph. - schema (BaseModel): The schema for the graph output. - - Example: - >>> pdf_scraper = PDFScraperGraph( - ... "List me all the attractions in Chioggia.", - ... "data/chioggia.pdf", - ... {"llm": {"model": "openai/gpt-3.5-turbo"}} - ... ) - >>> result = pdf_scraper.run() - """ - - def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None): - super().__init__(prompt, config, source, schema) - - self.input_key = "pdf" if source.endswith("pdf") else "pdf_dir" - - def _create_graph(self) -> BaseGraph: - """ - Creates the graph of nodes representing the workflow for web scraping. - - Returns: - BaseGraph: A graph instance representing the web scraping workflow. - """ - - fetch_node = FetchNode( - input='pdf | pdf_dir', - output=["doc"], - ) - - parse_node = ParseNode( - input="doc", - output=["parsed_doc"], - node_config={ - "parse_html": False, - "chunk_size": self.model_token, - "llm_model": self.llm_model - } - ) - - generate_answer_node_pdf = GenerateAnswerPDFNode( - input="user_prompt & (relevant_chunks | doc)", - output=["answer"], - node_config={ - "llm_model": self.llm_model, - "additional_info": self.config.get("additional_info"), - "schema": self.schema - } - ) - - return BaseGraph( - nodes=[ - fetch_node, - parse_node, - generate_answer_node_pdf, - ], - edges=[ - (fetch_node, parse_node), - (parse_node, generate_answer_node_pdf) - ], - entry_point=fetch_node, - graph_name=self.__class__.__name__ - ) - - def run(self) -> str: - """ - Executes the web scraping process and returns the answer to the prompt. - - Returns: - str: The answer to the prompt. - """ - - inputs = {"user_prompt": self.prompt, self.input_key: self.source} - self.final_state, self.execution_info = self.graph.execute(inputs) - - return self.final_state.get("answer", "No answer found.") diff --git a/scrapegraphai/graphs/pdf_scraper_multi_graph.py b/scrapegraphai/graphs/pdf_scraper_multi_graph.py deleted file mode 100644 index e0c56341..00000000 --- a/scrapegraphai/graphs/pdf_scraper_multi_graph.py +++ /dev/null @@ -1,102 +0,0 @@ -""" -PdfScraperMultiGraph Module -""" -from copy import deepcopy -from typing import List, Optional -from pydantic import BaseModel -from .base_graph import BaseGraph -from .abstract_graph import AbstractGraph -from .pdf_scraper_graph import PDFScraperGraph -from ..nodes import ( - GraphIteratorNode, - MergeAnswersNode -) -from ..utils.copy import safe_deepcopy - -class PdfScraperMultiGraph(AbstractGraph): - """ - PdfScraperMultiGraph is a scraping pipeline that scrapes a - list of URLs and generates answers to a given prompt. - It only requires a user prompt and a list of URLs. - - Attributes: - prompt (str): The user prompt to search the internet. - llm_model (dict): The configuration for the language model. - embedder_model (dict): The configuration for the embedder model. - headless (bool): A flag to run the browser in headless mode. - verbose (bool): A flag to display the execution information. - model_token (int): The token limit for the language model. - - Args: - prompt (str): The user prompt to search the internet. - source (List[str]): The source of the graph. - config (dict): Configuration parameters for the graph. - schema (Optional[BaseModel]): The schema for the graph output. - - Example: - >>> search_graph = MultipleSearchGraph( - ... "What is Chioggia famous for?", - ... {"llm": {"model": "openai/gpt-3.5-turbo"}} - ... ) - >>> result = search_graph.run() - """ - - def __init__(self, prompt: str, source: List[str], - config: dict, schema: Optional[BaseModel] = None): - - self.copy_config = safe_deepcopy(config) - - self.copy_schema = deepcopy(schema) - - super().__init__(prompt, config, source, schema) - - def _create_graph(self) -> BaseGraph: - """ - Creates the graph of nodes representing the workflow for web scraping and searching. - - Returns: - BaseGraph: A graph instance representing the web scraping and searching workflow. - """ - - graph_iterator_node = GraphIteratorNode( - input="user_prompt & pdfs", - output=["results"], - node_config={ - "graph_instance": PDFScraperGraph, - "scraper_config": self.copy_config, - }, - schema=self.copy_schema - ) - - merge_answers_node = MergeAnswersNode( - input="user_prompt & results", - output=["answer"], - node_config={ - "llm_model": self.llm_model, - "schema": self.copy_schema - } - ) - - return BaseGraph( - nodes=[ - graph_iterator_node, - merge_answers_node, - ], - edges=[ - (graph_iterator_node, merge_answers_node), - ], - entry_point=graph_iterator_node, - graph_name=self.__class__.__name__ - ) - - def run(self) -> str: - """ - Executes the web scraping and searching process. - - Returns: - str: The answer to the prompt. - """ - inputs = {"user_prompt": self.prompt, "pdfs": self.source} - self.final_state, self.execution_info = self.graph.execute(inputs) - - return self.final_state.get("answer", "No answer found.")