From a2b57c7e482dfb5c7c1a125d1684e0367088c83b Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:13:21 +0100 Subject: [PATCH 1/6] refactor: code refactoring --- scrapegraph-js/examples/.env.example | 2 +- scrapegraph-js/examples/getCredits_example.js | 7 +++---- .../examples/getSmartScraperRequest_example.js | 9 ++++----- scrapegraph-js/examples/sendFeedback_example.js | 12 ++++++------ scrapegraph-js/examples/smartScraper_example.js | 11 +++++------ 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/scrapegraph-js/examples/.env.example b/scrapegraph-js/examples/.env.example index 21d84ef..8d318c0 100644 --- a/scrapegraph-js/examples/.env.example +++ b/scrapegraph-js/examples/.env.example @@ -1,2 +1,2 @@ # ScrapegraphAI API Key -SGAI-APIKEY="your ScrapegraphAI API Key" \ No newline at end of file +SGAI_APIKEY="your ScrapegraphAI API Key" \ No newline at end of file diff --git a/scrapegraph-js/examples/getCredits_example.js b/scrapegraph-js/examples/getCredits_example.js index 7051b8d..a102028 100644 --- a/scrapegraph-js/examples/getCredits_example.js +++ b/scrapegraph-js/examples/getCredits_example.js @@ -1,11 +1,10 @@ -import { getCredits } from 'scrapegraph-sdk'; +import { getCredits } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; +const apiKey = process.env.SGAI_APIKEY; +try { const myCredit = await getCredits(apiKey); - console.log(myCredit) } catch (error) { console.error(error) diff --git a/scrapegraph-js/examples/getSmartScraperRequest_example.js b/scrapegraph-js/examples/getSmartScraperRequest_example.js index 88cfbbc..6a2d7fb 100644 --- a/scrapegraph-js/examples/getSmartScraperRequest_example.js +++ b/scrapegraph-js/examples/getSmartScraperRequest_example.js @@ -1,12 +1,11 @@ -import { getSmartScraperRequest } from 'scrapegraph-sdk'; +import { getSmartScraperRequest } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; - const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6' +const apiKey = process.env.SGAI_APIKEY; +const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6' +try { const requestInfo = await getSmartScraperRequest(apiKey, requestId); - console.log(requestInfo); } catch (error) { console.error(error); diff --git a/scrapegraph-js/examples/sendFeedback_example.js b/scrapegraph-js/examples/sendFeedback_example.js index 423e5c3..a3f246d 100644 --- a/scrapegraph-js/examples/sendFeedback_example.js +++ b/scrapegraph-js/examples/sendFeedback_example.js @@ -1,12 +1,12 @@ -import { sendFeedback } from 'scrapegraph-sdk'; +import { sendFeedback } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; - const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; - const rating = 5; - const feedbackMessage = 'This is a test feedback message.'; +const apiKey = process.env.SGAI_APIKEY; +const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; +const rating = 5; +const feedbackMessage = 'This is a test feedback message.'; +try { const feedback_response = await sendFeedback(apiKey, requestId, rating, feedbackMessage); console.log(feedback_response); } catch (error) { diff --git a/scrapegraph-js/examples/smartScraper_example.js b/scrapegraph-js/examples/smartScraper_example.js index 96d61b3..38e5613 100644 --- a/scrapegraph-js/examples/smartScraper_example.js +++ b/scrapegraph-js/examples/smartScraper_example.js @@ -1,13 +1,12 @@ -import { smartScraper } from 'scrapegraph-sdk'; +import { smartScraper } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; - const url = 'https://scrapegraphai.com'; - const prompt = 'What does the company do?'; +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com'; +const prompt = 'What does the company do?'; +try { const response = await smartScraper(apiKey, url, prompt); - console.log(response); } catch (error) { console.error(error); From 129917377b6a685d769a480b717bf980d3199833 Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:17:18 +0100 Subject: [PATCH 2/6] fix: the "workspace" key has been removed because it was conflicting with the package.json file in the scrapegraph-js folder. --- package.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/package.json b/package.json index b902016..a02e34d 100644 --- a/package.json +++ b/package.json @@ -9,9 +9,6 @@ }, "author": "ScrapeGraphAI", "license": "MIT", - "workspaces": [ - "scrapegraph-js" - ], "scripts": { "semantic-release": "semantic-release" }, From ee5738bd737cd07a553d148403a4bbb5e80e5be3 Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:18:41 +0100 Subject: [PATCH 3/6] chore: added Zod package dependency --- scrapegraph-js/package-lock.json | 26 +++++++++++++++++++++++--- scrapegraph-js/package.json | 3 ++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/scrapegraph-js/package-lock.json b/scrapegraph-js/package-lock.json index cb84fb4..ba7917b 100644 --- a/scrapegraph-js/package-lock.json +++ b/scrapegraph-js/package-lock.json @@ -1,15 +1,17 @@ { - "name": "scrapegraph-sdk", + "name": "scrapegraph-js", "version": "0.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "scrapegraph-sdk", + "name": "scrapegraph-js", "version": "0.0.1", "license": "MIT", "dependencies": { - "axios": "^1.6.0" + "axios": "^1.6.0", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.5" }, "devDependencies": { "dotenv": "^16.4.5" @@ -126,6 +128,24 @@ "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", "license": "MIT" + }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.23.5", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.5.tgz", + "integrity": "sha512-5wlSS0bXfF/BrL4jPAbz9da5hDlDptdEppYfe+x4eIJ7jioqKG9uUxOwPzqof09u/XeVdrgFu29lZi+8XNDJtA==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.23.3" + } } } } diff --git a/scrapegraph-js/package.json b/scrapegraph-js/package.json index e2d1ffb..46f1dd9 100644 --- a/scrapegraph-js/package.json +++ b/scrapegraph-js/package.json @@ -24,7 +24,8 @@ "type": "module", "dependencies": { "axios": "^1.6.0", - "zod": "^3.23.8" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.5" }, "devDependencies": { "dotenv": "^16.4.5" From cf2f28fa029df0acb7058fde8239046d77ef0a8a Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:20:19 +0100 Subject: [PATCH 4/6] docs: added an example of the smartScraper functionality using a schema --- scrapegraph-js/readme.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/scrapegraph-js/readme.md b/scrapegraph-js/readme.md index a69e313..654a540 100644 --- a/scrapegraph-js/readme.md +++ b/scrapegraph-js/readme.md @@ -77,9 +77,34 @@ const prompt = 'Extract the main heading and description.'; ``` #### Scraping with Custom Output Schema +> [!NOTE] +> To use this feature, it is necessary to employ the [Zod](https://www.npmjs.com/package/zod) package for schema creation. + +Here is a real-world example: ```javascript -//TODO +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +const apiKey = 'your-api-key'; +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and '; + +const schema = z.object({ + title: z.string().describe('The title of the webpage'), + description: z.string().describe('The description of the webpage'), + summary: z.string().describe('A brief summary of the webpage') +}); + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt, schema); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); ``` ### Checking API Credits From 10a1a5a477a6659aabf3afebfffdbefc14d12d3e Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:21:43 +0100 Subject: [PATCH 5/6] feat: implemented support for requests with schema --- scrapegraph-js/src/smartScraper.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scrapegraph-js/src/smartScraper.js b/scrapegraph-js/src/smartScraper.js index 5f8d3f5..b96a597 100644 --- a/scrapegraph-js/src/smartScraper.js +++ b/scrapegraph-js/src/smartScraper.js @@ -1,5 +1,7 @@ import axios from 'axios'; -import handleError from './utils/handleError.js' +import handleError from './utils/handleError.js'; +import { ZodType } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; /** * Scrape and extract structured data from a webpage using ScrapeGraph AI. @@ -25,12 +27,11 @@ export async function smartScraper(apiKey, url, prompt, schema = null) { }; if (schema) { - payload.output_schema = { - description: schema.title || 'Schema', - name: schema.title || 'Schema', - properties: schema.properties || {}, - required: schema.required || [] - }; + if (schema instanceof ZodType) { + payload.output_schema = zodToJsonSchema(schema); + } else { + throw new Error('The schema must be an instance of a valid Zod schema'); + } } try { From baf933b0826b63d4ecf61c8593676357619a1c73 Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:23:06 +0100 Subject: [PATCH 6/6] feat: added example of the smartScraper function using a schema --- .../examples/schema_smartScraper_example.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 scrapegraph-js/examples/schema_smartScraper_example.js diff --git a/scrapegraph-js/examples/schema_smartScraper_example.js b/scrapegraph-js/examples/schema_smartScraper_example.js new file mode 100644 index 0000000..5024bae --- /dev/null +++ b/scrapegraph-js/examples/schema_smartScraper_example.js @@ -0,0 +1,16 @@ +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and '; + +const schema = 2; + +try { + const response = await smartScraper(apiKey, url, prompt, schema); + console.log(response.result); +} catch (error) { + console.error(error); +} \ No newline at end of file