diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 8431e3e..0c362a8 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -6,11 +6,15 @@ name: Upload Python Package on: release: types: [published] + paths: + - 'scrapegraph-py/**' jobs: deploy: runs-on: ubuntu-latest + # Only run if scrapegraph-py has changes + if: contains(github.event.release.body, 'scrapegraph-py/') steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2f7a203..cb2e3a6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,11 +4,15 @@ on: branches: - main - pre/* + paths: + - 'scrapegraph-py/**' jobs: build: name: Build runs-on: ubuntu-latest + # Only run if scrapegraph-py has changes + if: contains(github.event.head_commit.modified, 'scrapegraph-py/') || contains(github.event.head_commit.added, 'scrapegraph-py/') || contains(github.event.head_commit.removed, 'scrapegraph-py/') steps: - name: Install git run: | diff --git a/CHANGELOG.MD b/CHANGELOG.MD deleted file mode 100644 index c1e7188..0000000 --- a/CHANGELOG.MD +++ /dev/null @@ -1,6 +0,0 @@ -# Changelog - -All notable changes to the Scrapegraph SDK will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e8508b8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 ScrapeGraphAI + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..c4be2ee --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ +# 🌐 ScrapeGraph AI SDKs + +[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![Python SDK](https://img.shields.io/badge/Python_SDK-Latest-blue)](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) +[![JavaScript SDK](https://img.shields.io/badge/JavaScript_SDK-Latest-yellow)](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) +[![Documentation](https://img.shields.io/badge/Documentation-Latest-green)](https://scrapegraphai.com/docs) + +Official SDKs for the ScrapeGraph AI API - Intelligent web scraping powered by AI. Extract structured data from any webpage with natural language prompts. + +The credits can be bougth [here](https://scrapegraphai.com)! + +## πŸš€ Quick Links + +- [Python SDK Documentation](scrapegraph-py/README.md) +- [JavaScript SDK Documentation](scrapegraph-js/README.md) +- [API Documentation](https://scrapegraphai.com/docs) +- [Website](https://scrapegraphai.com) + +## πŸ“¦ Installation + +### Python +```bash +pip install scrapegraph-py +``` + +### JavaScript +```bash +npm install scrapegraph-js +``` + +## 🎯 Core Features + +- πŸ€– **AI-Powered Extraction**: Use natural language to describe what data you want +- πŸ“Š **Structured Output**: Get clean, structured data with optional schema validation +- πŸ”„ **Multiple Formats**: Extract data as JSON, Markdown, or custom schemas +- ⚑ **High Performance**: Concurrent processing and automatic retries +- πŸ”’ **Enterprise Ready**: Production-grade security and rate limiting + +## πŸ› οΈ Available Endpoints + +### πŸ” SmartScraper +Extract structured data from any webpage using natural language prompts. + +### πŸ“ Markdownify +Convert any webpage into clean, formatted markdown. + +### πŸ’» LocalScraper +Extract information from a local HTML file using AI. + + +## 🌟 Key Benefits + +- πŸ“ **Natural Language Queries**: No complex selectors or XPath needed +- 🎯 **Precise Extraction**: AI understands context and structure +- πŸ”„ **Adaptive Scraping**: Works with dynamic and static content +- πŸ“Š **Schema Validation**: Ensure data consistency with Pydantic/TypeScript +- ⚑ **Async Support**: Handle multiple requests efficiently + +## πŸ’‘ Use Cases + +- 🏒 **Business Intelligence**: Extract company information and contacts +- πŸ“Š **Market Research**: Gather product data and pricing +- πŸ“° **Content Aggregation**: Convert articles to structured formats +- πŸ” **Data Mining**: Extract specific information from multiple sources +- πŸ“± **App Integration**: Feed clean data into your applications + +## πŸ“– Documentation + +For detailed documentation and examples, visit: +- [Python SDK Guide](scrapegraph-py/README.md) +- [JavaScript SDK Guide](scrapegraph-js/README.md) +- [API Documentation](https://scrapegraphai.com/docs) + +## πŸ’¬ Support & Feedback + +- πŸ“§ Email: support@scrapegraphai.com +- πŸ’» GitHub Issues: [Create an issue](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues) +- 🌟 Feature Requests: [Request a feature](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues/new) + +## πŸ“„ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +--- + +Made with ❀️ by [ScrapeGraph AI](https://scrapegraphai.com) diff --git a/readme.md b/readme.md deleted file mode 100644 index a44b6d9..0000000 --- a/readme.md +++ /dev/null @@ -1,137 +0,0 @@ -# ScrapeGraph SDKs - -Official SDKs for interacting with the ScrapeGraph AI API - a powerful web scraping and data extraction service. - -## Available SDKs - -- [Python SDK (scrapegraph-py)](#python-sdk) -- [JavaScript SDK (scrapegraph-js)](#javascript-sdk) - -## Python SDK - -### Installation -bash -pip install scrapegraph-py -### Features - -- Web Scraping (basic and structured) -- Credits checking -- Feedback submission -- API status checking -- Local HTML scraping support -- Pydantic schema integration - -### Basic Usage -python -from scrapegraph_py import ScrapeGraphClient, smart_scraper -from dotenv import load_dotenv -import os -load_dotenv() -api_key = os.getenv("SCRAPEGRAPH_API_KEY") -client = ScrapeGraphClient(api_key) -url = "https://example.com" -prompt = "What does the company do?" -result = smart_scraper(client, url, prompt) -print(result) -### Structured Data with Schema -python -from pydantic import BaseModel, Field -class CompanyInfoSchema(BaseModel): -company_name: str = Field(description="The name of the company") -description: str = Field(description="A description of the company") -main_products: list[str] = Field(description="The main products of the company") -result = smart_scraper( -client=client, -url="https://example.com", -prompt="Extract company information", -schema=CompanyInfoSchema -) -bash -npm install scrapegraph-js -### Features - -- Smart web scraping -- Credits management -- Feedback submission -- Schema-based extraction -- Promise-based API - -### Basic Usage -javascript -import { smartScraper, credits, feedback } from 'scrapegraph-js'; -const apiKey = process.env.SCRAPEGRAPH_API_KEY; -const url = 'https://example.com'; -// Basic scraping -const result = await smartScraper(apiKey, url, "What does the company do?"); -console.log(JSON.parse(result)); -// Check credits -const creditsInfo = await credits(apiKey); -console.log(JSON.parse(creditsInfo)); -### Schema-based Extraction -avascript -const schema = { -title: "CompanyInfo", -properties: { -company_name: { type: "string", description: "The name of the company" }, -description: { type: "string", description: "A description of the company" }, -main_products: { -type: "array", -items: { type: "string" }, -description: "The main products of the company" -} -}, -required: ["company_name", "description"] -}; -const result = await smartScraper(apiKey, url, "Extract company information", schema); -## Authentication - -Both SDKs support authentication via API key. We recommend storing your API key in environment variables: -bash -For Python -export SCRAPEGRAPH_API_KEY="your-api-key-here" -For Node.js -export SCRAPEGRAPH_API_KEY="your-api-key-here" -Or using a `.env` file: -plaintext -SCRAPEGRAPH_API_KEY="your-api-key-here" - -## Error Handling - -Both SDKs provide consistent error handling -json -{ -"error": "HTTP error occurred", -"message": "Error details", -"status_code": 400 -} - -## Development - -### Python SDK Requirements -- Python 3.9+ -- [Rye](https://rye-up.com/) for dependency management (optional) - -### JavaScript SDK Requirements -- Node.js 14+ -- npm or yarn - -## Contributing - -We welcome contributions to both SDKs! Please check our [Contributing Guidelines](CONTRIBUTING.md) for more information. - -## License - -Both SDKs are licensed under the MIT License. - -## Support - -For support: -- Visit [ScrapeGraph AI Documentation](https://sgai-api.onrender.com/docs) -- Check the examples in the respective SDK's examples directory -- Contact our support team - -## Links - -- [Python SDK Documentation](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) -- [JavaScript SDK Documentation](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) -This README combines information from both SDKs and provides a comprehensive overview of their features and usage. I referenced the following code blocks for accuracy: diff --git a/scrapegraph-js/CODE_OF_CONDUCT.md b/scrapegraph-js/CODE_OF_CONDUCT.md index 237eaed..10e3c8a 100644 --- a/scrapegraph-js/CODE_OF_CONDUCT.md +++ b/scrapegraph-js/CODE_OF_CONDUCT.md @@ -17,23 +17,23 @@ diverse, inclusive, and healthy community. Examples of behavior that contributes to a positive environment for our community include: -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience -* Focusing on what is best not just for us as individuals, but for the +- Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: -* The use of sexualized language or imagery, and sexual attention or +- The use of sexualized language or imagery, and sexual attention or advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a +- Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities @@ -106,7 +106,7 @@ Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an +standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within diff --git a/scrapegraph-js/LICENSE b/scrapegraph-js/LICENSE deleted file mode 100644 index 70e3834..0000000 --- a/scrapegraph-js/LICENSE +++ /dev/null @@ -1,7 +0,0 @@ -Copyright 2024 Scrapgraph-ai team - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the β€œSoftware”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED β€œAS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/scrapegraph-js/readme.md b/scrapegraph-js/README.md similarity index 84% rename from scrapegraph-js/readme.md rename to scrapegraph-js/README.md index 654a540..9ed7150 100644 --- a/scrapegraph-js/readme.md +++ b/scrapegraph-js/README.md @@ -9,11 +9,11 @@ Official JavaScript/TypeScript SDK for the ScrapeGraph AI API - Smart web scrapi ## πŸš€ Features -- ✨ Smart web scraping with AI -- πŸ”„ Fully asynchronous design -- πŸ” Detailed error handling -- ⚑ Automatic retries and logging -- πŸ” Secure API authentication +- ✨ Smart web scraping with AI +- πŸ”„ Fully asynchronous design +- πŸ” Detailed error handling +- ⚑ Automatic retries and logging +- πŸ” Secure API authentication ## πŸ“¦ Installation @@ -23,11 +23,10 @@ Install the package using npm or yarn: # Using npm npm i scrapegraph-js -# Using yarn +# Using yarn yarn add scrapegraph-js ``` - ## πŸ”§ Quick Start > **Note**: Store your API keys securely in environment variables. Use `.env` files and libraries like `dotenv` to load them into your app. @@ -36,7 +35,6 @@ yarn add scrapegraph-js ```javascript import { smartScraper } from 'scrapegraph-js'; -import 'dotenv/config'; // Initialize variables const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable @@ -77,6 +75,7 @@ const prompt = 'Extract the main heading and description.'; ``` #### Scraping with Custom Output Schema + > [!NOTE] > To use this feature, it is necessary to employ the [Zod](https://www.npmjs.com/package/zod) package for schema creation. @@ -85,7 +84,6 @@ Here is a real-world example: ```javascript import { smartScraper } from 'scrapegraph-js'; import { z } from 'zod'; -import 'dotenv/config'; const apiKey = 'your-api-key'; const url = 'https://scrapegraphai.com/'; @@ -94,7 +92,7 @@ const prompt = 'What does the company do? and '; const schema = z.object({ title: z.string().describe('The title of the webpage'), description: z.string().describe('The description of the webpage'), - summary: z.string().describe('A brief summary of the webpage') + summary: z.string().describe('A brief summary of the webpage'), }); (async () => { @@ -107,6 +105,25 @@ const schema = z.object({ })(); ``` +### Markdownify +Converts a webpage into clean, well-structured markdown format. +```javascript +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = "your_api_key"; +const url = 'https://scrapegraphai.com/'; + +(async () => { + try { + const response = await markdownify(apiKey, url); + console.log(response); + } catch (error) { + console.error(error); + } +})(); +``` + + ### Checking API Credits ```javascript @@ -153,12 +170,14 @@ For detailed documentation, visit [docs.scrapegraphai.com](https://docs.scrapegr ### Setup 1. Clone the repository: + ```bash git clone https://github.com/ScrapeGraphAI/scrapegraph-sdk.git cd scrapegraph-sdk/scrapegraph-js ``` 2. Install dependencies: + ```bash npm install ``` @@ -187,22 +206,22 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change. -1. Fork the repository -2. Create your feature branch (`git checkout -b feature/AmazingFeature`) -3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) -4. Push to the branch (`git push origin feature/AmazingFeature`) -5. Open a Pull Request +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/AmazingFeature`) +3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) +4. Push to the branch (`git push origin feature/AmazingFeature`) +5. Open a Pull Request ## πŸ”— Links -- [Website](https://scrapegraphai.com) -- [Documentation](https://scrapegraphai.com/documentation) -- [GitHub](https://github.com/ScrapeGraphAI/scrapegraph-sdk) +- [Website](https://scrapegraphai.com) +- [Documentation](https://scrapegraphai.com/documentation) +- [GitHub](https://github.com/ScrapeGraphAI/scrapegraph-sdk) ## πŸ’¬ Support -- πŸ“§ Email: support@scrapegraphai.com -- πŸ’» GitHub Issues: [Create an issue](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues) +- πŸ“§ Email: support@scrapegraphai.com +- πŸ’» GitHub Issues: [Create an issue](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues) - 🌟 Feature Requests: [Request a feature](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues/new) --- diff --git a/scrapegraph-js/eslint.config.js b/scrapegraph-js/eslint.config.js index 01a4fe2..a3fe107 100644 --- a/scrapegraph-js/eslint.config.js +++ b/scrapegraph-js/eslint.config.js @@ -7,5 +7,5 @@ export default [ { languageOptions: { globals: { ...globals.browser, ...globals.node } } }, pluginJs.configs.recommended, eslintPluginPrettierRecommended, - { ignorePatterns: ['node_modules/'] }, + { ignores: ['node_modules/'] }, ]; diff --git a/scrapegraph-js/examples/getCredits_example.js b/scrapegraph-js/examples/getCredits_example.js index a102028..e233b08 100644 --- a/scrapegraph-js/examples/getCredits_example.js +++ b/scrapegraph-js/examples/getCredits_example.js @@ -4,8 +4,8 @@ import 'dotenv/config'; const apiKey = process.env.SGAI_APIKEY; try { - const myCredit = await getCredits(apiKey); - console.log(myCredit) + const myCredit = await getCredits(apiKey); + console.log(myCredit); } catch (error) { - console.error(error) -} \ No newline at end of file + console.error(error); +} diff --git a/scrapegraph-js/examples/getSmartScraperRequest_example.js b/scrapegraph-js/examples/getSmartScraperRequest_example.js index 6a2d7fb..ebafaa6 100644 --- a/scrapegraph-js/examples/getSmartScraperRequest_example.js +++ b/scrapegraph-js/examples/getSmartScraperRequest_example.js @@ -2,11 +2,11 @@ import { getSmartScraperRequest } from 'scrapegraph-js'; import 'dotenv/config'; const apiKey = process.env.SGAI_APIKEY; -const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6' +const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6'; try { const requestInfo = await getSmartScraperRequest(apiKey, requestId); console.log(requestInfo); } catch (error) { console.error(error); -} \ No newline at end of file +} diff --git a/scrapegraph-js/examples/markdownify_example.js b/scrapegraph-js/examples/markdownify_example.js new file mode 100644 index 0000000..5136b8f --- /dev/null +++ b/scrapegraph-js/examples/markdownify_example.js @@ -0,0 +1,35 @@ +import { getMarkdownifyRequest, markdownify } from 'scrapegraph-js'; +import fs from 'fs'; +import 'dotenv/config'; + +// markdownify function example +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com/'; + +try { + const response = await markdownify(apiKey, url); + console.log(response); + saveFile(response.result); +} catch (error) { + console.error(error); +} + +// helper function for save the file locally +function saveFile(output) { + try { + fs.writeFileSync('result.md', output); + console.log('Success!'); + } catch (err) { + console.error('Error during the file writing: ', err); + } +} + +// getMarkdownifyRequest function example +const requestId = '2563b972-cb6f-400b-be76-edb235458560'; + +try { + const response = await getMarkdownifyRequest(apiKey, requestId); + console.log(response); +} catch (error) { + console.log(error); +} diff --git a/scrapegraph-js/examples/schema_smartScraper_example.js b/scrapegraph-js/examples/schema_smartScraper_example.js index 5024bae..bdf51df 100644 --- a/scrapegraph-js/examples/schema_smartScraper_example.js +++ b/scrapegraph-js/examples/schema_smartScraper_example.js @@ -6,11 +6,15 @@ const apiKey = process.env.SGAI_APIKEY; const url = 'https://scrapegraphai.com/'; const prompt = 'What does the company do? and '; -const schema = 2; +const schema = z.object({ + title: z.string().describe('The title of the webpage'), + description: z.string().describe('The description of the webpage'), + summary: z.string().describe('A brief summary of the webpage'), +}); try { const response = await smartScraper(apiKey, url, prompt, schema); console.log(response.result); } catch (error) { console.error(error); -} \ No newline at end of file +} diff --git a/scrapegraph-js/examples/sendFeedback_example.js b/scrapegraph-js/examples/sendFeedback_example.js index a3f246d..27f9851 100644 --- a/scrapegraph-js/examples/sendFeedback_example.js +++ b/scrapegraph-js/examples/sendFeedback_example.js @@ -10,5 +10,5 @@ try { const feedback_response = await sendFeedback(apiKey, requestId, rating, feedbackMessage); console.log(feedback_response); } catch (error) { - console.error(error) -} \ No newline at end of file + console.error(error); +} diff --git a/scrapegraph-js/index.js b/scrapegraph-js/index.js index e1530a2..ca4dbb7 100644 --- a/scrapegraph-js/index.js +++ b/scrapegraph-js/index.js @@ -1,3 +1,4 @@ export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js'; +export { markdownify, getMarkdownifyRequest } from './src/markdownify.js'; export { getCredits } from './src/credits.js'; -export { sendFeedback } from './src/feedback.js'; \ No newline at end of file +export { sendFeedback } from './src/feedback.js'; diff --git a/scrapegraph-js/package-lock.json b/scrapegraph-js/package-lock.json index 7b42d4d..c30ae03 100644 --- a/scrapegraph-js/package-lock.json +++ b/scrapegraph-js/package-lock.json @@ -1,6 +1,6 @@ { "name": "scrapegraph-js", - "version": "0.0.1", + "version": "0.0.2", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/scrapegraph-js/package.json b/scrapegraph-js/package.json index dc55319..50dea8e 100644 --- a/scrapegraph-js/package.json +++ b/scrapegraph-js/package.json @@ -1,7 +1,7 @@ { "name": "scrapegraph-js", "author": "ScrapeGraphAI", - "version": "0.0.1", + "version": "0.0.2", "description": "Scrape and extract structured data from a webpage using ScrapeGraphAI's APIs.", "repository": { "type": "git", diff --git a/scrapegraph-js/src/credits.js b/scrapegraph-js/src/credits.js index 1c83e90..d6c5465 100644 --- a/scrapegraph-js/src/credits.js +++ b/scrapegraph-js/src/credits.js @@ -3,7 +3,7 @@ import handleError from './utils/handleError.js'; /** * Retrieve credits from the API. - * + * * @param {string} apiKey - Your ScrapeGraph AI API key * @returns {Promise} Response from the API in JSON format */ @@ -11,13 +11,13 @@ export async function getCredits(apiKey) { const endpoint = 'https://api.scrapegraphai.com/v1/credits'; const headers = { 'accept': 'application/json', - 'SGAI-APIKEY': apiKey + 'SGAI-APIKEY': apiKey, }; try { const response = await axios.get(endpoint, { headers }); return response.data; } catch (error) { - handleError(error) + handleError(error); } -} \ No newline at end of file +} diff --git a/scrapegraph-js/src/feedback.js b/scrapegraph-js/src/feedback.js index 206a87f..8f5bed6 100644 --- a/scrapegraph-js/src/feedback.js +++ b/scrapegraph-js/src/feedback.js @@ -3,7 +3,7 @@ import handleError from './utils/handleError.js'; /** * Send feedback to the API. - * + * * @param {string} apiKey - Your ScrapeGraph AI API key * @param {string} requestId - The request ID associated with the feedback * @param {number} rating - The rating score @@ -15,13 +15,13 @@ export async function sendFeedback(apiKey, requestId, rating, feedbackText = nul const headers = { 'accept': 'application/json', 'SGAI-APIKEY': apiKey, - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', }; const feedbackData = { request_id: requestId, rating: rating, - feedback_text: feedbackText + feedback_text: feedbackText, }; try { @@ -30,4 +30,4 @@ export async function sendFeedback(apiKey, requestId, rating, feedbackText = nul } catch (error) { handleError(error); } -} \ No newline at end of file +} diff --git a/scrapegraph-js/src/markdownify.js b/scrapegraph-js/src/markdownify.js new file mode 100644 index 0000000..5a1d4e5 --- /dev/null +++ b/scrapegraph-js/src/markdownify.js @@ -0,0 +1,52 @@ +import axios from 'axios'; +import handleError from './utils/handleError.js'; + +/** + * Converts a webpage into clean, well-structured markdown format. + * + * @param {string} apiKey - Your ScrapeGraph AI API key. + * @param {string} url - The URL of the webpage to be converted. + * @returns {Promise} A promise that resolves to the markdown representation of the webpage. + * @throws {Error} Throws an error if the HTTP request fails. + */ +export async function markdownify(apiKey, url){ + const endpoint = 'https://api.scrapegraphai.com/v1/markdownify'; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + const payload = { + website_url: url, + }; + + try { + const response = await axios.post(endpoint, payload, { headers }); + return response.data; + } catch (error) { + handleError(error) + } +} + +/** + * Retrieves the status or result of a markdownify request, with the option to review results from previous requests. + * + * @param {string} apiKey - Your ScrapeGraph AI API key. + * @param {string} requestId - The unique identifier for the markdownify request whose result you want to retrieve. + * @returns {Promise} A promise that resolves with details about the status or outcome of the specified request. + * @throws {Error} Throws an error if the HTTP request fails. + */ +export async function getMarkdownifyRequest(apiKey, requestId){ + const endpoint = 'https://api.scrapegraphai.com/v1/markdownify/' + requestId; + const headers = { + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + }; + + try { + const response = await axios.get(endpoint, { headers }); + return response.data; + } catch (error) { + handleError(error) + } +} \ No newline at end of file diff --git a/scrapegraph-js/src/smartScraper.js b/scrapegraph-js/src/smartScraper.js index b96a597..ce721e2 100644 --- a/scrapegraph-js/src/smartScraper.js +++ b/scrapegraph-js/src/smartScraper.js @@ -5,7 +5,7 @@ import { zodToJsonSchema } from 'zod-to-json-schema'; /** * Scrape and extract structured data from a webpage using ScrapeGraph AI. - * + * * @param {string} apiKey - Your ScrapeGraph AI API key * @param {string} url - The URL of the webpage to scrape * @param {string} prompt - Natural language prompt describing what data to extract @@ -18,12 +18,12 @@ export async function smartScraper(apiKey, url, prompt, schema = null) { const headers = { 'accept': 'application/json', 'SGAI-APIKEY': apiKey, - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', }; const payload = { website_url: url, - user_prompt: prompt + user_prompt: prompt, }; if (schema) { @@ -38,13 +38,13 @@ export async function smartScraper(apiKey, url, prompt, schema = null) { const response = await axios.post(endpoint, payload, { headers }); return response.data; } catch (error) { - handleError(error) + handleError(error); } } /** * Retrieve the status or the result of a smartScraper request. It also allows you to see the result of old requests. - * + * * @param {string} apiKey - Your ScrapeGraph AI API key * @param {string} requestId - The request ID associated with the output of a smartScraper request. * @returns {Promise} Information related to the status or result of a scraping request. @@ -60,6 +60,6 @@ export async function getSmartScraperRequest(apiKey, requestId) { const response = await axios.get(endpoint, { headers }); return response.data; } catch (error) { - handleError(error) + handleError(error); } -} \ No newline at end of file +} diff --git a/scrapegraph-js/src/utils/handleError.js b/scrapegraph-js/src/utils/handleError.js index eec79d7..c46a4fc 100644 --- a/scrapegraph-js/src/utils/handleError.js +++ b/scrapegraph-js/src/utils/handleError.js @@ -31,10 +31,10 @@ class UnexpectedError extends Error { export default function handleError(error) { if (error.response) { - throw new HttpError(error.response.status, error.response.statusText, error.response.data.detail) + throw new HttpError(error.response.status, error.response.statusText, error.response.data.detail); } else if (error.request) { throw new NetworkError('Impossible to contact the server. Check your internet connection.'); } else { throw new UnexpectedError(`${error.message}`); } -} \ No newline at end of file +} diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index 357bd0d..26cc6f1 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,21 @@ +## [1.8.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.7.0...v1.8.0) (2024-12-08) + + +### Features + +* add markdownify functionality ([239d27a](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/239d27aac28c6b132aba54bbb1fa0216cc59ce89)) + + +### Bug Fixes + +* fixed configuration for ignored files ([bc08dcb](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/bc08dcb21536a146fd941119931bc8e89e8e42c6)) +* fixed schema example ([365378a](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/365378a0c8c9125800ed6d74629d87776cf484a0)) + + +### Docs + +* improved main readme ([50fdf92](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/50fdf920e1d00e8f457138f9e68df74354696fc0)) + ## [1.7.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.6.0...v1.7.0) (2024-12-05)