1+ import { axios } from "@pipedream/platform" ;
2+
13export default {
24 type : "app" ,
35 app : "scrapegraphai" ,
4- propDefinitions : { } ,
6+ version : "0.0.{ts}" ,
7+ propDefinitions : {
8+ url : {
9+ type : "string" ,
10+ label : "URL to Scrape" ,
11+ description : "The URL of the website to scrape." ,
12+ } ,
13+ jobId : {
14+ type : "string" ,
15+ label : "Job ID" ,
16+ description : "The ID of the scraping job." ,
17+ } ,
18+ dataFields : {
19+ type : "string[]" ,
20+ label : "Data Fields" ,
21+ description : "Optional data fields to extract from the scraped content." ,
22+ optional : true ,
23+ } ,
24+ paginationSettings : {
25+ type : "string[]" ,
26+ label : "Pagination Settings" ,
27+ description : "Optional pagination settings for the scraping job." ,
28+ optional : true ,
29+ } ,
30+ headers : {
31+ type : "string[]" ,
32+ label : "Headers" ,
33+ description : "Optional headers to include in the scraping request." ,
34+ optional : true ,
35+ } ,
36+ filterDataFields : {
37+ type : "string[]" ,
38+ label : "Filter Data Fields" ,
39+ description : "Optional data fields to filter the results." ,
40+ optional : true ,
41+ } ,
42+ taskId : {
43+ type : "string" ,
44+ label : "Task ID" ,
45+ description : "The ID of the scraping task to monitor." ,
46+ optional : true ,
47+ } ,
48+ scrapingJobFilter : {
49+ type : "string" ,
50+ label : "Scraping Job Filter" ,
51+ description : "Filter events by specific scraping jobs." ,
52+ optional : true ,
53+ } ,
54+ dataTypeFilter : {
55+ type : "string" ,
56+ label : "Data Type Filter" ,
57+ description : "Filter events by specific data types." ,
58+ optional : true ,
59+ } ,
60+ scrapingTaskNameFilter : {
61+ type : "string" ,
62+ label : "Scraping Task Name Filter" ,
63+ description : "Filter events by specific scraping task names." ,
64+ optional : true ,
65+ } ,
66+ errorTypeFilter : {
67+ type : "string" ,
68+ label : "Error Type Filter" ,
69+ description : "Filter error events by specific error types." ,
70+ optional : true ,
71+ } ,
72+ } ,
573 methods : {
674 // this.$auth contains connected account data
775 authKeys ( ) {
876 console . log ( Object . keys ( this . $auth ) ) ;
977 } ,
78+ _baseUrl ( ) {
79+ return "https://api.scrapegraphai.com/v1" ;
80+ } ,
81+ async _makeRequest ( opts = { } ) {
82+ const {
83+ $ = this , method = "GET" , path = "/" , headers, ...otherOpts
84+ } = opts ;
85+ return axios ( $ , {
86+ ...otherOpts ,
87+ method,
88+ url : this . _baseUrl ( ) + path ,
89+ headers : {
90+ ...headers ,
91+ Authorization : `Bearer ${ this . $auth . api_key } ` ,
92+ } ,
93+ } ) ;
94+ } ,
95+ async startScrapingJob ( opts = { } ) {
96+ const {
97+ url,
98+ dataFields,
99+ paginationSettings,
100+ headers,
101+ ...otherOpts
102+ } = opts ;
103+ const data = {
104+ url : this . url ,
105+ } ;
106+ if ( this . dataFields ) {
107+ data . data_fields = this . dataFields . map ( JSON . parse ) ;
108+ }
109+ if ( this . paginationSettings ) {
110+ data . pagination_settings = this . paginationSettings . map ( JSON . parse ) ;
111+ }
112+ if ( this . headers ) {
113+ data . headers = this . headers . map ( JSON . parse ) ;
114+ }
115+ return this . _makeRequest ( {
116+ method : "POST" ,
117+ path : "/smartscraper/start" ,
118+ data,
119+ ...otherOpts ,
120+ } ) ;
121+ } ,
122+ async retrieveScrapingResults ( opts = { } ) {
123+ const {
124+ jobId, filterDataFields, ...otherOpts
125+ } = opts ;
126+ const params = {
127+ job_id : this . jobId ,
128+ } ;
129+ if ( this . filterDataFields ) {
130+ params . filter_data_fields = this . filterDataFields ;
131+ }
132+ return this . _makeRequest ( {
133+ method : "GET" ,
134+ path : "/smartscraper/get-results" ,
135+ params,
136+ ...otherOpts ,
137+ } ) ;
138+ } ,
139+ async stopScrapingJob ( opts = { } ) {
140+ const {
141+ jobId, ...otherOpts
142+ } = opts ;
143+ return this . _makeRequest ( {
144+ method : "POST" ,
145+ path : "/smartscraper/stop" ,
146+ data : {
147+ job_id : this . jobId ,
148+ } ,
149+ ...otherOpts ,
150+ } ) ;
151+ } ,
152+ async onTaskCompleted ( opts = { } ) {
153+ const {
154+ taskId, scrapingJobFilter, ...otherOpts
155+ } = opts ;
156+ const params = { } ;
157+ if ( this . taskId ) {
158+ params . task_id = this . taskId ;
159+ }
160+ if ( this . scrapingJobFilter ) {
161+ params . scraping_job = this . scrapingJobFilter ;
162+ }
163+ return this . _makeRequest ( {
164+ method : "GET" ,
165+ path : "/events/task-completed" ,
166+ params,
167+ ...otherOpts ,
168+ } ) ;
169+ } ,
170+ async onNewDataAvailable ( opts = { } ) {
171+ const {
172+ dataTypeFilter, scrapingTaskNameFilter, ...otherOpts
173+ } = opts ;
174+ const params = { } ;
175+ if ( this . dataTypeFilter ) {
176+ params . data_type = this . dataTypeFilter ;
177+ }
178+ if ( this . scrapingTaskNameFilter ) {
179+ params . scraping_task_name = this . scrapingTaskNameFilter ;
180+ }
181+ return this . _makeRequest ( {
182+ method : "GET" ,
183+ path : "/events/new-data" ,
184+ params,
185+ ...otherOpts ,
186+ } ) ;
187+ } ,
188+ async onErrorOccurred ( opts = { } ) {
189+ const {
190+ errorTypeFilter, scrapingJobFilter, ...otherOpts
191+ } = opts ;
192+ const params = { } ;
193+ if ( this . errorTypeFilter ) {
194+ params . error_type = this . errorTypeFilter ;
195+ }
196+ if ( this . scrapingJobFilter ) {
197+ params . scraping_job = this . scrapingJobFilter ;
198+ }
199+ return this . _makeRequest ( {
200+ method : "GET" ,
201+ path : "/events/error" ,
202+ params,
203+ ...otherOpts ,
204+ } ) ;
205+ } ,
206+ async paginate ( fn , ...opts ) {
207+ const results = [ ] ;
208+ const fetchPage = async ( page = 1 ) => {
209+ const response = await fn ( {
210+ page,
211+ ...opts ,
212+ } ) ;
213+ if ( response && response . items && response . items . length > 0 ) {
214+ results . push ( ...response . items ) ;
215+ if ( response . has_more ) {
216+ await fetchPage ( page + 1 ) ;
217+ }
218+ }
219+ } ;
220+ await fetchPage ( ) ;
221+ return results ;
222+ } ,
10223 } ,
11- } ;
224+ } ;
0 commit comments