diff --git a/scrapegraph-js/examples/schema_searchScraper_example.js b/scrapegraph-js/examples/schema_searchScraper_example.js index 9ef087a..c51c414 100644 --- a/scrapegraph-js/examples/schema_searchScraper_example.js +++ b/scrapegraph-js/examples/schema_searchScraper_example.js @@ -1,3 +1,12 @@ +/** + * Schema-based SearchScraper Example + * + * This example demonstrates both schema-based output and configurable website limits: + * - Default: 3 websites (30 credits) + * - Enhanced: 5 websites (50 credits) - provides more comprehensive data for schema + * - Maximum: 20 websites (200 credits) - for highly detailed schema population + */ + import { searchScraper } from 'scrapegraph-js'; import { z } from 'zod'; import 'dotenv/config'; @@ -11,9 +20,25 @@ const schema = z.object({ major_features: z.array(z.string()), }); +// Configure number of websites for better schema population +const numResults = 5; // Enhanced search for better schema data (50 credits) + try { - const response = await searchScraper(apiKey, prompt, schema); - console.log(response.result); + console.log(`šŸ” Searching ${numResults} websites with custom schema`); + console.log(`šŸ’³ Credits required: ${numResults <= 3 ? 30 : 30 + (numResults - 3) * 10}`); + console.log('-'.repeat(60)); + + const response = await searchScraper(apiKey, prompt, numResults, schema); + + console.log('āœ… Schema-based search completed successfully!'); + console.log('\nšŸ“‹ STRUCTURED RESULT:'); + console.log(JSON.stringify(response.result, null, 2)); + + console.log('\nšŸ”— Reference URLs:'); + response.reference_urls?.forEach((url, index) => { + console.log(`${index + 1}. ${url}`); + }); + } catch (error) { - console.error(error); + console.error('āŒ Error:', error.message); } diff --git a/scrapegraph-js/examples/searchScraper_enhanced_example.js b/scrapegraph-js/examples/searchScraper_enhanced_example.js new file mode 100644 index 0000000..0535c55 --- /dev/null +++ b/scrapegraph-js/examples/searchScraper_enhanced_example.js @@ -0,0 +1,333 @@ +/** + * Enhanced SearchScraper Example + * + * This example demonstrates the SearchScraper API with configurable website limits. + * Issue #144 enhancement allows users to search up to 20 websites (increased from the previous limit of 3) + * with a dynamic credit pricing system. + * + * Key Features: + * - Configurable website limits (3-20 websites) + * - Dynamic credit pricing: 30 credits base + 10 credits per additional website + * - Enhanced research depth and accuracy + * - Backward compatibility with existing applications + * + * Cost Structure: + * - Base cost: 30 credits for 3 websites (default) + * - Additional websites: 10 credits each (e.g., 5 websites = 30 + 2*10 = 50 credits) + * - Maximum websites: 20 (total cost: 30 + 17*10 = 200 credits) + * + * Requirements: + * - Node.js + * - scrapegraph-js package + * - dotenv package + * - A .env file with your SGAI_APIKEY + * + * Example .env file: + * SGAI_APIKEY=your_api_key_here + */ + +import { searchScraper } from 'scrapegraph-js'; +import 'dotenv/config'; + +/** + * Calculate the required credits for a SearchScraper request. + * @param {number} numWebsites - Number of websites to scrape (3-20) + * @returns {number} Total credits required + */ +function calculateCredits(numWebsites) { + // Validate website count + const validatedCount = Math.max(3, Math.min(20, numWebsites)); + + // Calculate credits: 30 base + 10 per extra website + if (validatedCount <= 3) { + return 30; + } else { + const extraWebsites = validatedCount - 3; + return 30 + (extraWebsites * 10); + } +} + +/** + * Query the Enhanced SearchScraper API for search results. + * @param {string} userPrompt - The search prompt string + * @param {number} numResults - Number of websites to scrape (3-20). Default is 3. + * @returns {Promise} The search results with metadata + */ +async function searchScraperQuery(userPrompt, numResults = 3) { + const apiKey = process.env.SGAI_APIKEY; + + if (!apiKey) { + throw new Error('SGAI_APIKEY not found in environment variables. Please create a .env file with: SGAI_APIKEY=your_api_key_here'); + } + + // Validate and calculate credits + const validatedWebsites = Math.max(3, Math.min(20, numResults)); + const requiredCredits = calculateCredits(validatedWebsites); + + console.log(`šŸ” Search Prompt: ${userPrompt}`); + console.log(`🌐 Requested websites: ${numResults} → Validated: ${validatedWebsites}`); + console.log(`šŸ’³ Required credits: ${requiredCredits}`); + console.log('-'.repeat(60)); + + const startTime = Date.now(); + + try { + const response = await searchScraper(apiKey, userPrompt, numResults); + const executionTime = (Date.now() - startTime) / 1000; + + console.log(`ā±ļø Execution time: ${executionTime.toFixed(2)} seconds`); + + // Extract result data + const resultData = { + result: response.result || '', + references: response.reference_urls || [], + metadata: { + request_id: response.request_id, + num_results: validatedWebsites, + execution_time: executionTime, + required_credits: requiredCredits, + }, + }; + + console.log(`āœ… Found ${resultData.references.length} reference sources`); + console.log(`šŸ“Š Credits used: ${requiredCredits}`); + + return resultData; + + } catch (error) { + const executionTime = (Date.now() - startTime) / 1000; + console.log(`ā±ļø Execution time: ${executionTime.toFixed(2)} seconds`); + console.log(`āŒ Error: ${error.message}`); + throw error; + } +} + +/** + * Demonstrate the benefits of different website scaling options. + */ +function demonstrateScalingBenefits() { + console.log('šŸ’° SEARCHSCRAPER CREDIT SCALING'); + console.log('='.repeat(50)); + + const scalingExamples = [ + [3, 'Standard Search (Default)'], + [5, 'Enhanced Search (More Sources)'], + [10, 'Comprehensive Search (Deep Research)'], + [15, 'Extensive Search (Maximum Coverage)'], + [20, 'Ultimate Search (Complete Coverage)'], + ]; + + scalingExamples.forEach(([websites, description]) => { + const credits = calculateCredits(websites); + const extraWebsites = Math.max(0, websites - 3); + const efficiency = websites / credits; + + console.log(`🌐 ${websites.toString().padStart(2)} websites (${description})`); + console.log(` šŸ’³ ${credits.toString().padStart(3)} credits (base: 30 + ${extraWebsites} Ɨ 10)`); + console.log(` šŸ“Š Efficiency: ${efficiency.toFixed(3)} websites/credit`); + console.log(); + }); +} + +/** + * Run the same query with different website limits to show the benefit. + */ +async function runComparisonExample() { + const query = 'Latest advancements in artificial intelligence 2024'; + + console.log('šŸ”¬ COMPARISON: STANDARD vs ENHANCED SEARCH'); + console.log('='.repeat(60)); + console.log(`Query: ${query}`); + console.log(); + + // Test different configurations + const configurations = [ + { websites: 3, description: 'Standard Search' }, + { websites: 7, description: 'Enhanced Search' }, + ]; + + const results = {}; + + for (const config of configurations) { + const { websites, description } = config; + + console.log(`šŸš€ Running ${description} (${websites} websites)...`); + try { + const result = await searchScraperQuery(query, websites); + results[websites] = result; + console.log(`āœ… ${description} completed successfully`); + console.log(` šŸ“„ Result length: ${result.result.length} characters`); + console.log(` šŸ”— References: ${result.references.length} sources`); + console.log(); + } catch (error) { + console.log(`āŒ ${description} failed: ${error.message}`); + console.log(); + } + } + + // Show comparison summary + const resultKeys = Object.keys(results); + if (resultKeys.length > 1) { + console.log('šŸ“Š COMPARISON SUMMARY'); + console.log('-'.repeat(40)); + resultKeys.forEach(websites => { + const result = results[websites]; + const metadata = result.metadata; + console.log( + `🌐 ${websites} websites: ${result.references.length} sources, ` + + `${metadata.required_credits} credits, ` + + `${metadata.execution_time.toFixed(1)}s` + ); + }); + } +} + +/** + * Run concurrent searches to demonstrate parallel processing + */ +async function runConcurrentExample() { + console.log('šŸš€ CONCURRENT REQUESTS EXAMPLE'); + console.log('='.repeat(50)); + + // Define multiple queries with different website limits + const queries = [ + ['JavaScript best practices 2024', 3], + ['React vs Vue comparison', 5], + ['Node.js performance optimization', 4], + ]; + + console.log('šŸ”„ Running concurrent searches...'); + const startTime = Date.now(); + + try { + // Create promises for concurrent execution + const promises = queries.map(([query, numResults]) => + searchScraperQuery(query, numResults) + ); + + // Wait for all requests to complete + const results = await Promise.allSettled(promises); + const totalTime = (Date.now() - startTime) / 1000; + + console.log(`ā±ļø Total concurrent execution time: ${totalTime.toFixed(2)} seconds`); + console.log(); + + const successfulResults = results.filter(r => r.status === 'fulfilled').map(r => r.value); + const failedResults = results.filter(r => r.status === 'rejected'); + + console.log(`āœ… Successful requests: ${successfulResults.length}`); + console.log(`āŒ Failed requests: ${failedResults.length}`); + + if (successfulResults.length > 0) { + const totalCredits = successfulResults.reduce((sum, r) => sum + r.metadata.required_credits, 0); + const totalSources = successfulResults.reduce((sum, r) => sum + r.references.length, 0); + console.log(`šŸ’³ Total credits used: ${totalCredits}`); + console.log(`šŸ”— Total sources gathered: ${totalSources}`); + } + + if (failedResults.length > 0) { + console.log('\nāŒ Failed requests:'); + failedResults.forEach((result, index) => { + console.log(` ${index + 1}. ${result.reason.message}`); + }); + } + + } catch (error) { + console.log(`āŒ Concurrent execution failed: ${error.message}`); + } + + console.log(); +} + +/** + * Main function demonstrating enhanced SearchScraper features. + */ +async function main() { + console.log('šŸš€ ENHANCED SEARCHSCRAPER DEMONSTRATION'); + console.log('šŸ”— Issue #144: SearchScraper Website Limit Enhancement'); + console.log('='.repeat(70)); + console.log(); + + // Check API key + const apiKey = process.env.SGAI_APIKEY; + if (!apiKey) { + console.log('āŒ Error: SGAI_APIKEY not found in .env file'); + console.log('Please create a .env file with your API key:'); + console.log('SGAI_APIKEY=your_api_key_here'); + console.log(); + console.log('šŸ“– Showing credit scaling demonstration without API calls...'); + console.log(); + demonstrateScalingBenefits(); + return; + } + + try { + // 1. Show credit scaling + demonstrateScalingBenefits(); + + // 2. Run basic example + console.log('šŸŽÆ BASIC EXAMPLE'); + console.log('='.repeat(30)); + + const userPrompt = 'What are the latest trends in machine learning?'; + const numResults = 5; // Enhanced search with 5 websites + + try { + const results = await searchScraperQuery(userPrompt, numResults); + + console.log(); + console.log('šŸ“‹ RESULTS SUMMARY:'); + console.log(` šŸ” Query: ${userPrompt}`); + console.log(` 🌐 Websites scraped: ${results.metadata.num_results}`); + console.log(` šŸ’³ Credits used: ${results.metadata.required_credits}`); + console.log(` ā±ļø Execution time: ${results.metadata.execution_time.toFixed(1)}s`); + console.log(` šŸ”— Reference sources: ${results.references.length}`); + console.log(); + + // Show a portion of the result + const resultText = results.result; + if (resultText.length > 300) { + console.log(`šŸ“„ Result preview: ${resultText.substring(0, 300)}...`); + } else { + console.log(`šŸ“„ Result: ${resultText}`); + } + console.log(); + + // Show references + console.log('šŸ”— REFERENCE SOURCES:'); + results.references.slice(0, 5).forEach((ref, i) => { + console.log(` ${i + 1}. ${ref}`); + }); + if (results.references.length > 5) { + console.log(` ... and ${results.references.length - 5} more sources`); + } + console.log(); + + } catch (error) { + console.log(`āŒ Error: ${error.message}`); + console.log(); + } + + // 3. Run comparison example + await runComparisonExample(); + + // 4. Run concurrent example + await runConcurrentExample(); + + console.log('✨ Enhanced SearchScraper demonstration completed!'); + console.log(); + console.log('šŸŽÆ Key Enhancement Benefits:'); + console.log(' • Configurable website limits (3-20)'); + console.log(' • Transparent credit pricing'); + console.log(' • Better research depth and accuracy'); + console.log(' • Maintained backward compatibility'); + console.log(' • Enhanced data validation through multiple sources'); + console.log(' • Concurrent request support for better performance'); + + } catch (error) { + console.log(`āŒ Unexpected error: ${error.message}`); + } +} + +// Run the demonstration +main().catch(console.error); \ No newline at end of file diff --git a/scrapegraph-js/examples/searchScraper_example.js b/scrapegraph-js/examples/searchScraper_example.js index 0bb1df7..3bf7bce 100644 --- a/scrapegraph-js/examples/searchScraper_example.js +++ b/scrapegraph-js/examples/searchScraper_example.js @@ -1,12 +1,38 @@ +/** + * Basic SearchScraper Example + * + * This example demonstrates the configurable website limits feature: + * - Default: 3 websites (30 credits) + * - Enhanced: 5 websites (50 credits) - uncomment to try + * - Maximum: 20 websites (200 credits) - for comprehensive research + */ + import { searchScraper } from 'scrapegraph-js'; import 'dotenv/config'; const apiKey = process.env.SGAI_APIKEY; const prompt = 'What is the latest version of Python and what are its main features?'; +// Configure the number of websites to search +const numResults = 3; // Default: 3 websites (30 credits) +// const numResults = 5; // Enhanced: 5 websites (50 credits) - uncomment for more comprehensive results +// const numResults = 10; // Deep research: 10 websites (100 credits) - uncomment for extensive research + try { - const response = await searchScraper(apiKey, prompt); - console.log(response); + console.log(`šŸ” Searching ${numResults} websites for: ${prompt}`); + console.log(`šŸ’³ Credits required: ${numResults <= 3 ? 30 : 30 + (numResults - 3) * 10}`); + console.log('-'.repeat(60)); + + const response = await searchScraper(apiKey, prompt, numResults); + + console.log('āœ… Search completed successfully!'); + console.log('\nšŸ“‹ RESULTS:'); + console.log(`Result: ${response.result}`); + console.log('\nšŸ”— Reference URLs:'); + response.reference_urls?.forEach((url, index) => { + console.log(`${index + 1}. ${url}`); + }); + } catch (error) { - console.error(error); + console.error('āŒ Error:', error.message); } diff --git a/scrapegraph-js/src/searchScraper.js b/scrapegraph-js/src/searchScraper.js index e632d9e..8aa75e1 100644 --- a/scrapegraph-js/src/searchScraper.js +++ b/scrapegraph-js/src/searchScraper.js @@ -8,12 +8,15 @@ import { zodToJsonSchema } from 'zod-to-json-schema'; * * @param {string} apiKey - Your ScrapeGraph AI API key * @param {string} prompt - Natural language prompt describing what data to extract + * @param {number} [numResults=3] - Number of websites to scrape (3-20). Default is 3. + * More websites provide better research depth but cost more credits. + * Credit calculation: 30 base + 10 per additional website beyond 3. * @param {Object} [schema] - Optional schema object defining the output structure * @param {String} userAgent - the user agent like "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" * @returns {Promise} Extracted data in JSON format matching the provided schema * @throws - Will throw an error in case of an HTTP failure. */ -export async function searchScraper(apiKey, prompt, schema = null, userAgent = null) { +export async function searchScraper(apiKey, prompt, numResults = 3, schema = null, userAgent = null) { const endpoint = 'https://api.scrapegraphai.com/v1/searchscraper'; const headers = { 'accept': 'application/json', @@ -23,8 +26,14 @@ export async function searchScraper(apiKey, prompt, schema = null, userAgent = n if (userAgent) headers['User-Agent'] = userAgent; + // Validate numResults + if (numResults < 3 || numResults > 20) { + throw new Error('numResults must be between 3 and 20'); + } + const payload = { user_prompt: prompt, + num_results: numResults, }; if (schema) { diff --git a/scrapegraph-py/examples/async/async_searchscraper_example.py b/scrapegraph-py/examples/async/async_searchscraper_example.py index 1aae8f9..4b7f016 100644 --- a/scrapegraph-py/examples/async/async_searchscraper_example.py +++ b/scrapegraph-py/examples/async/async_searchscraper_example.py @@ -1,5 +1,10 @@ """ Example of using the async searchscraper functionality to search for information concurrently. + +This example demonstrates the configurable website limits feature: +- Default: 3 websites (30 credits) +- Enhanced: 5 websites (50 credits) - for better research depth +- Maximum: 20 websites (200 credits) - for comprehensive research """ import asyncio @@ -14,15 +19,18 @@ async def main(): # Initialize async client sgai_client = AsyncClient(api_key="your-api-key-here") - # List of search queries + # List of search queries with different website limits for demonstration queries = [ - "What is the latest version of Python and what are its main features?", - "What are the key differences between Python 2 and Python 3?", - "What is Python's GIL and how does it work?", + ("What is the latest version of Python and what are its main features?", 3), + ("What are the key differences between Python 2 and Python 3?", 5), + ("What is Python's GIL and how does it work?", 3), ] - # Create tasks for concurrent execution - tasks = [sgai_client.searchscraper(user_prompt=query) for query in queries] + # Create tasks for concurrent execution with configurable website limits + tasks = [ + sgai_client.searchscraper(user_prompt=query, num_results=num_results) + for query, num_results in queries + ] # Execute requests concurrently responses = await asyncio.gather(*tasks, return_exceptions=True) @@ -32,8 +40,10 @@ async def main(): if isinstance(response, Exception): print(f"\nError for query {i+1}: {response}") else: + query, num_results = queries[i] print(f"\nSearch {i+1}:") - print(f"Query: {queries[i]}") + print(f"Query: {query}") + print(f"Websites searched: {num_results} (Credits: {30 if num_results <= 3 else 30 + (num_results - 3) * 10})") print(f"Result: {response['result']}") print("Reference URLs:") for url in response["reference_urls"]: diff --git a/scrapegraph-py/examples/async/async_searchscraper_schema_example.py b/scrapegraph-py/examples/async/async_searchscraper_schema_example.py index 753a3e0..5caebdb 100644 --- a/scrapegraph-py/examples/async/async_searchscraper_schema_example.py +++ b/scrapegraph-py/examples/async/async_searchscraper_schema_example.py @@ -1,5 +1,10 @@ """ Example of using the async searchscraper functionality with output schemas for extraction. + +This example demonstrates both schema-based output and configurable website limits: +- Using different website limits for different complexity levels +- Enhanced searches provide better data for complex schema population +- Concurrent processing of multiple schema-based searches """ import asyncio @@ -37,26 +42,36 @@ async def main(): # Initialize async client sgai_client = AsyncClient(api_key="your-api-key-here") - # Define search queries with their corresponding schemas + # Define search queries with their corresponding schemas and website limits searches = [ { "prompt": "What is the latest version of Python? Include the release date and main features.", "schema": PythonVersionInfo, + "num_results": 4, # Moderate search for version info (40 credits) }, { "prompt": "Compare Python 2 and Python 3, including backward compatibility and migration difficulty.", "schema": PythonComparison, + "num_results": 6, # Enhanced search for comparison (60 credits) }, { "prompt": "Explain Python's GIL, its purpose, limitations, and possible workarounds.", "schema": GILInfo, + "num_results": 8, # Deep search for technical details (80 credits) }, ] - # Create tasks for concurrent execution + print("šŸš€ Starting concurrent schema-based searches with configurable limits:") + for i, search in enumerate(searches, 1): + credits = 30 if search["num_results"] <= 3 else 30 + (search["num_results"] - 3) * 10 + print(f" {i}. {search['num_results']} websites ({credits} credits): {search['prompt'][:50]}...") + print() + + # Create tasks for concurrent execution with configurable website limits tasks = [ sgai_client.searchscraper( user_prompt=search["prompt"], + num_results=search["num_results"], output_schema=search["schema"], ) for search in searches diff --git a/scrapegraph-py/examples/sync/searchscraper_example.py b/scrapegraph-py/examples/sync/searchscraper_example.py index 2b1903d..21bdb5f 100644 --- a/scrapegraph-py/examples/sync/searchscraper_example.py +++ b/scrapegraph-py/examples/sync/searchscraper_example.py @@ -1,18 +1,41 @@ """ Example of using the searchscraper functionality to search for information. + +This example demonstrates the configurable website limits feature: +- Default: 3 websites (30 credits) +- Enhanced: 5 websites (50 credits) - uncomment to try +- Maximum: 20 websites (200 credits) - for comprehensive research + +Requirements: +- A .env file with your SGAI_API_KEY + +Example .env file: +SGAI_API_KEY=your_api_key_here """ +import os +from dotenv import load_dotenv from scrapegraph_py import Client from scrapegraph_py.logger import sgai_logger +# Load environment variables from .env file +load_dotenv() + sgai_logger.set_logging(level="INFO") -# Initialize the client -client = Client(api_key="your-api-key-here") +# Initialize the client with API key from environment +api_key = os.getenv("SGAI_API_KEY") +if not api_key: + raise ValueError("SGAI_API_KEY not found in environment variables. Please create a .env file with: SGAI_API_KEY=your_api_key_here") + +client = Client(api_key=api_key) -# Send a searchscraper request +# Send a searchscraper request with configurable website limits response = client.searchscraper( - user_prompt="What is the latest version of Python and what are its main features?" + user_prompt="What is the latest version of Python and what are its main features?", + num_results=3 # Default: 3 websites (30 credits) + # num_results=5 # Enhanced: 5 websites (50 credits) - uncomment for more comprehensive results + # num_results=10 # Deep research: 10 websites (100 credits) - uncomment for extensive research ) # Print the results diff --git a/scrapegraph-py/examples/sync/searchscraper_schema_example.py b/scrapegraph-py/examples/sync/searchscraper_schema_example.py index 8c678b2..fbc5422 100644 --- a/scrapegraph-py/examples/sync/searchscraper_schema_example.py +++ b/scrapegraph-py/examples/sync/searchscraper_schema_example.py @@ -1,5 +1,10 @@ """ Example of using the searchscraper functionality with a custom output schema. + +This example demonstrates both schema-based output and configurable website limits: +- Default: 3 websites (30 credits) +- Enhanced: 5 websites (50 credits) - provides more comprehensive data for schema +- Maximum: 20 websites (200 credits) - for highly detailed schema population """ from typing import List @@ -23,9 +28,14 @@ class PythonVersionInfo(BaseModel): # Initialize the client client = Client(api_key="your-api-key-here") -# Send a searchscraper request with schema +# Send a searchscraper request with schema and configurable website limits +num_results = 5 # Enhanced search for better schema data (50 credits) +print(f"šŸ” Searching {num_results} websites with custom schema") +print(f"šŸ’³ Credits required: {30 if num_results <= 3 else 30 + (num_results - 3) * 10}") + response = client.searchscraper( user_prompt="What is the latest version of Python? Include the release date and main features.", + num_results=num_results, # More websites for better schema population output_schema=PythonVersionInfo, ) diff --git a/scrapegraph-py/scrapegraph_py/async_client.py b/scrapegraph-py/scrapegraph_py/async_client.py index a852c18..3c297f9 100644 --- a/scrapegraph-py/scrapegraph_py/async_client.py +++ b/scrapegraph-py/scrapegraph_py/async_client.py @@ -254,17 +254,29 @@ async def get_credits(self): async def searchscraper( self, user_prompt: str, + num_results: Optional[int] = 3, headers: Optional[dict[str, str]] = None, output_schema: Optional[BaseModel] = None, ): - """Send a searchscraper request""" + """Send a searchscraper request + + Args: + user_prompt: The search prompt string + num_results: Number of websites to scrape (3-20). Default is 3. + More websites provide better research depth but cost more credits. + Credit calculation: 30 base + 10 per additional website beyond 3. + headers: Optional headers to send with the request + output_schema: Optional schema to structure the output + """ logger.info("šŸ” Starting searchscraper request") logger.debug(f"šŸ“ Prompt: {user_prompt}") + logger.debug(f"🌐 Number of results: {num_results}") if headers: logger.debug("šŸ”§ Using custom headers") request = SearchScraperRequest( user_prompt=user_prompt, + num_results=num_results, headers=headers, output_schema=output_schema, ) diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py index d4a7108..72363e7 100644 --- a/scrapegraph-py/scrapegraph_py/client.py +++ b/scrapegraph-py/scrapegraph_py/client.py @@ -259,17 +259,29 @@ def get_credits(self): def searchscraper( self, user_prompt: str, + num_results: Optional[int] = 3, headers: Optional[dict[str, str]] = None, output_schema: Optional[BaseModel] = None, ): - """Send a searchscraper request""" + """Send a searchscraper request + + Args: + user_prompt: The search prompt string + num_results: Number of websites to scrape (3-20). Default is 3. + More websites provide better research depth but cost more credits. + Credit calculation: 30 base + 10 per additional website beyond 3. + headers: Optional headers to send with the request + output_schema: Optional schema to structure the output + """ logger.info("šŸ” Starting searchscraper request") logger.debug(f"šŸ“ Prompt: {user_prompt}") + logger.debug(f"🌐 Number of results: {num_results}") if headers: logger.debug("šŸ”§ Using custom headers") request = SearchScraperRequest( user_prompt=user_prompt, + num_results=num_results, headers=headers, output_schema=output_schema, ) diff --git a/scrapegraph-py/scrapegraph_py/models/searchscraper.py b/scrapegraph-py/scrapegraph_py/models/searchscraper.py index 997d407..16da931 100644 --- a/scrapegraph-py/scrapegraph_py/models/searchscraper.py +++ b/scrapegraph-py/scrapegraph_py/models/searchscraper.py @@ -8,6 +8,13 @@ class SearchScraperRequest(BaseModel): user_prompt: str = Field(..., example="What is the latest version of Python?") + num_results: Optional[int] = Field( + default=3, + ge=3, + le=20, + example=5, + description="Number of websites to scrape (3-20). Default is 3. More websites provide better research depth but cost more credits." + ) headers: Optional[dict[str, str]] = Field( None, example={