-
Notifications
You must be signed in to change notification settings - Fork 13
Lama index integration #23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
9ed8ac0
b278943
9fddb05
432edf6
c888847
82adf2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| .env | ||
| .DS_Store | ||
| *.csv |
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| SGAI_API_KEY="your-api-key-here" |
Large diffs are not rendered by default.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| SGAI_API_KEY="your-api-key-here" |
Large diffs are not rendered by default.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| SGAI_API_KEY="your-api-key-here" |
Large diffs are not rendered by default.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| SGAI_API_KEY="your-api-key-here" |
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't need to be changed |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do not remove |
This file was deleted.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| from llama_index.tools.scrapegraph.base import ScrapegraphToolSpec | ||
| from pydantic import BaseModel, Field | ||
| from typing import List | ||
| import os | ||
|
|
||
| # Initialize the ScrapegraphToolSpec | ||
| scrapegraph_tool = ScrapegraphToolSpec() | ||
|
|
||
| # Define the schema for a single repository | ||
| class RepositorySchema(BaseModel): | ||
| name: str = Field(description="Name of the repository (e.g., 'owner/repo')") | ||
| description: str = Field(description="Description of the repository") | ||
| stars: int = Field(description="Star count of the repository") | ||
| forks: int = Field(description="Fork count of the repository") | ||
| today_stars: int = Field(description="Stars gained today") | ||
| language: str = Field(description="Programming language used") | ||
|
|
||
| # Define the schema for a list of repositories | ||
| class ListRepositoriesSchema(BaseModel): | ||
| repositories: List[RepositorySchema] = Field(description="List of GitHub trending repositories") | ||
|
|
||
| # Make the API call to scrape GitHub trending repositories | ||
| response = scrapegraph_tool.scrapegraph_smartscraper( | ||
| prompt="Extract information about trending GitHub repositories", | ||
| url="https://github.com/trending", | ||
| api_key="sgai-cd497c94-9ac5-4259-b7b5-f3283affe481", | ||
| schema=ListRepositoriesSchema, | ||
| ) | ||
|
|
||
| # Get the result and print each repository | ||
| result = response["result"] | ||
| print("\nTrending Repositories:") | ||
| for repo in result["repositories"]: | ||
| print(f"\nRepository: {repo['name']}") | ||
| print(f"Description: {repo['description']}") | ||
| print(f"Stars: {repo['stars']}") | ||
| print(f"Forks: {repo['forks']}") | ||
| print(f"Today's Stars: {repo['today_stars']}") | ||
| print(f"Language: {repo['language']}") |
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
|
|
||
| from llama_index.tools.scrapegraph.base import ScrapegraphToolSpec | ||
|
|
||
| scrapegraph_tool = ScrapegraphToolSpec() | ||
|
|
||
| from pydantic import BaseModel, Field | ||
|
|
||
| class FounderSchema(BaseModel): | ||
| name: str = Field(description="Name of the founder") | ||
| role: str = Field(description="Role of the founder") | ||
| social_media: str = Field(description="Social media URL of the founder") | ||
|
|
||
| class ListFoundersSchema(BaseModel): | ||
| founders: list[FounderSchema] = Field(description="List of founders") | ||
|
|
||
| response = scrapegraph_tool.scrapegraph_smartscraper( | ||
| prompt="Extract product information", | ||
| url="https://scrapegraphai.com/", | ||
| api_key="sgai-cd497c94-9ac5-4259-b7b5-f3283affe481", | ||
| schema=ListFoundersSchema, | ||
| ) | ||
|
|
||
| result = response["result"] | ||
|
|
||
| for founder in result["founders"]: | ||
| print(founder) |
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| from llama_index.tools.scrapegraph.base import ScrapegraphToolSpec | ||
| from pydantic import BaseModel, Field | ||
| from typing import List | ||
| import os | ||
|
|
||
| # Initialize the ScrapegraphToolSpec | ||
| scrapegraph_tool = ScrapegraphToolSpec() | ||
|
|
||
| # Schema for a single news item | ||
| class NewsItemSchema(BaseModel): | ||
| category: str = Field(description="Category of the news (e.g., 'Health', 'Environment')") | ||
| title: str = Field(description="Title of the news article") | ||
| link: str = Field(description="URL to the news article") | ||
| author: str = Field(description="Author of the news article") | ||
|
|
||
| # Schema containing a list of news items | ||
| class ListNewsSchema(BaseModel): | ||
| news: List[NewsItemSchema] = Field(description="List of news articles with their details") | ||
|
|
||
| # Make the API call to scrape news articles | ||
| response = scrapegraph_tool.scrapegraph_smartscraper( | ||
| prompt="Extract information about science news articles", | ||
| url="https://www.wired.com/tag/science/", | ||
| api_key="sgai-cd497c94-9ac5-4259-b7b5-f3283affe481", | ||
| schema=ListNewsSchema, | ||
| ) | ||
|
|
||
| # Get the result and print each news article | ||
| result = response["result"] | ||
| print("\nWired Science News Articles:") | ||
| for article in result["news"]: | ||
| print(f"\nCategory: {article['category']}") | ||
| print(f"Title: {article['title']}") | ||
| print(f"Author: {article['author']}") | ||
| print(f"Link: {article['link']}") | ||
|
|
||
| # Save to CSV (optional) | ||
| import pandas as pd | ||
| df = pd.DataFrame(result["news"]) | ||
| df.to_csv("wired_news.csv", index=False) | ||
| print("\nData saved to wired_news.csv") |
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
|
Large diffs are not rendered by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to remove