Skip to content
This repository was archived by the owner on Jul 3, 2023. It is now read-only.

Commit 94ff440

Browse files
author
proxycrawl
committed
Adds support to StorageAPI and small improvements
1 parent d407a4b commit 94ff440

File tree

7 files changed

+431
-17
lines changed

7 files changed

+431
-17
lines changed

README.md

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ Example:
149149
```ruby
150150
begin
151151
response = scraper_api.get('https://www.amazon.com/Halo-SleepSack-Swaddle-Triangle-Neutral/dp/B01LAG1TOS')
152+
puts response.remaining_requests
152153
puts response.status_code
153154
puts response.body
154155
rescue => exception
@@ -160,11 +161,15 @@ end
160161

161162
Initialize with your Leads API token and call the `get` method.
162163

164+
For more details on the implementation, please visit the [Leads API documentation](https://proxycrawl.com/docs/leads-api).
165+
163166
```ruby
164167
leads_api = ProxyCrawl::LeadsAPI.new(token: 'YOUR_TOKEN')
165168

166169
begin
167170
response = leads_api.get('stripe.com')
171+
puts response.success
172+
puts response.remaining_requests
168173
puts response.status_code
169174
puts response.body
170175
rescue => exception
@@ -184,6 +189,8 @@ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
184189

185190
begin
186191
response = screenshots_api.get('https://www.apple.com')
192+
puts response.success
193+
puts response.remaining_requests
187194
puts response.status_code
188195
puts response.screenshot_path # do something with screenshot_path here
189196
rescue => exception
@@ -200,6 +207,8 @@ begin
200207
response = screenshots_api.get('https://www.apple.com') do |file|
201208
# do something (reading/writing) with the image file here
202209
end
210+
puts response.success
211+
puts response.remaining_requests
203212
puts response.status_code
204213
rescue => exception
205214
puts exception.backtrace
@@ -215,6 +224,8 @@ begin
215224
response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
216225
# do something (reading/writing) with the image file here
217226
end
227+
puts response.success
228+
puts response.remaining_requests
218229
puts response.status_code
219230
rescue => exception
220231
puts exception.backtrace
@@ -223,6 +234,111 @@ end
223234

224235
Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
225236

237+
## Storage API usage
238+
239+
Initialize the Storage API using your private token.
240+
241+
```ruby
242+
storage_api = ProxyCrawl::StorageAPI.new(token: 'YOUR_TOKEN')
243+
```
244+
245+
Pass the [url](https://proxycrawl.com/docs/storage-api/parameters/#url) that you want to get from [Proxycrawl Storage](https://proxycrawl.com/dashboard/storage).
246+
247+
```ruby
248+
begin
249+
response = storage_api.get('https://www.apple.com')
250+
puts response.original_status
251+
puts response.pc_status
252+
puts response.url
253+
puts response.status_code
254+
puts response.rid
255+
puts response.body
256+
puts response.stored_at
257+
rescue => exception
258+
puts exception.backtrace
259+
end
260+
```
261+
262+
or you can use the [RID](https://proxycrawl.com/docs/storage-api/parameters/#rid)
263+
264+
```ruby
265+
begin
266+
response = storage_api.get(RID)
267+
puts response.original_status
268+
puts response.pc_status
269+
puts response.url
270+
puts response.status_code
271+
puts response.rid
272+
puts response.body
273+
puts response.stored_at
274+
rescue => exception
275+
puts exception.backtrace
276+
end
277+
```
278+
279+
Note: One of the two RID or URL must be sent. So both are optional but it's mandatory to send one of the two.
280+
281+
### [Delete](https://proxycrawl.com/docs/storage-api/delete/) request
282+
283+
To delete a storage item from your storage area, use the correct RID
284+
285+
```ruby
286+
if storage_api.delete(RID)
287+
puts 'delete success'
288+
else
289+
puts "Unable to delete: #{storage_api.body['error']}"
290+
end
291+
```
292+
293+
### [Bulk](https://proxycrawl.com/docs/storage-api/bulk/) request
294+
295+
To do a bulk request with a list of RIDs, please send the list of rids as an array
296+
297+
```ruby
298+
begin
299+
response = storage_api.bulk([RID1, RID2, RID3, ...])
300+
puts response.original_status
301+
puts response.pc_status
302+
puts response.url
303+
puts response.status_code
304+
puts response.rid
305+
puts response.body
306+
puts response.stored_at
307+
rescue => exception
308+
puts exception.backtrace
309+
end
310+
```
311+
312+
### [RIDs](https://proxycrawl.com/docs/storage-api/rids) request
313+
314+
To request a bulk list of RIDs from your storage area
315+
316+
```ruby
317+
begin
318+
response = storage_api.rids
319+
puts response.status_code
320+
puts response.rid
321+
puts response.body
322+
rescue => exception
323+
puts exception.backtrace
324+
end
325+
```
326+
327+
You can also specify a limit as a parameter
328+
329+
```ruby
330+
storage_api.rids(100)
331+
```
332+
333+
### [Total Count](https://proxycrawl.com/docs/storage-api/total_count)
334+
335+
To get the total number of documents in your storage area
336+
337+
```ruby
338+
total_count = storage_api.total_count
339+
puts "total_count: #{total_count}"
340+
```
341+
226342
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
227343

228344
## Development

lib/proxycrawl.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
require 'proxycrawl/scraper_api'
66
require 'proxycrawl/leads_api'
77
require 'proxycrawl/screenshots_api'
8+
require 'proxycrawl/storage_api'
89

910
module ProxyCrawl
1011
end

lib/proxycrawl/api.rb

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
module ProxyCrawl
88
class API
9-
attr_reader :token, :body, :status_code, :original_status, :pc_status, :url
9+
attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
1010

1111
INVALID_TOKEN = 'Token is required'
1212
INVALID_URL = 'URL is required'
@@ -69,19 +69,13 @@ def prepare_uri(url, options)
6969
end
7070

7171
def prepare_response(response, format)
72-
if format == 'json' || base_url.include?('/scraper')
73-
json_body = JSON.parse(response.body)
74-
@original_status = json_body['original_status'].to_i
75-
@pc_status = json_body['pc_status'].to_i
76-
@url = json_body['url']
77-
@status_code = response.code.to_i
78-
else
79-
@original_status = response['original_status'].to_i
80-
@status_code = response.code.to_i
81-
@pc_status = response['pc_status'].to_i
82-
@url = response['url']
83-
end
72+
res = format == 'json' || base_url.include?('/scraper') ? JSON.parse(response.body) : response
8473

74+
@original_status = res['original_status'].to_i
75+
@pc_status = res['pc_status'].to_i
76+
@url = res['url']
77+
@storage_url = res['storage_url']
78+
@status_code = response.code.to_i
8579
@body = response.body
8680
end
8781
end

lib/proxycrawl/leads_api.rb

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66

77
module ProxyCrawl
88
class LeadsAPI
9-
attr_reader :token, :body, :status_code
9+
attr_reader :token, :body, :status_code, :success, :remaining_requests
1010

1111
INVALID_TOKEN = 'Token is required'
1212
INVALID_DOMAIN = 'Domain is required'
1313

1414
def initialize(options = {})
15-
raise INVALID_TOKEN if options[:token].nil?
15+
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
1616

1717
@token = options[:token]
1818
end
@@ -24,11 +24,18 @@ def get(domain)
2424
uri.query = URI.encode_www_form({ token: token, domain: domain })
2525

2626
response = Net::HTTP.get_response(uri)
27-
2827
@status_code = response.code.to_i
2928
@body = response.body
3029

30+
json_body = JSON.parse(response.body)
31+
@success = json_body['success']
32+
@remaining_requests = json_body['remaining_requests'].to_i
33+
3134
self
3235
end
36+
37+
def post
38+
raise 'Only GET is allowed for the LeadsAPI'
39+
end
3340
end
3441
end

lib/proxycrawl/storage_api.rb

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# frozen_string_literal: true
2+
3+
require 'net/http'
4+
require 'json'
5+
require 'uri'
6+
7+
module ProxyCrawl
8+
class StorageAPI
9+
attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
10+
11+
INVALID_TOKEN = 'Token is required'
12+
INVALID_RID = 'RID is required'
13+
INVALID_RID_ARRAY = 'One or more RIDs are required'
14+
INVALID_URL_OR_RID = 'Either URL or RID is required'
15+
BASE_URL = 'https://api.proxycrawl.com/storage'
16+
17+
def initialize(options = {})
18+
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
19+
20+
@token = options[:token]
21+
end
22+
23+
def get(url_or_rid, format = 'html')
24+
raise INVALID_URL_OR_RID if url_or_rid.nil? || url_or_rid.empty?
25+
26+
uri = URI(BASE_URL)
27+
uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
28+
response = Net::HTTP.get_response(uri)
29+
30+
res = format == 'json' ? JSON.parse(response.body) : response
31+
32+
@original_status = res['original_status'].to_i
33+
@pc_status = res['pc_status'].to_i
34+
@url = res['url']
35+
@rid = res['rid']
36+
@stored_at = res['stored_at']
37+
38+
@status_code = response.code.to_i
39+
@body = response.body
40+
41+
self
42+
end
43+
44+
def delete(rid)
45+
raise INVALID_RID if rid.nil? || rid.empty?
46+
47+
uri = URI(BASE_URL)
48+
uri.query = URI.encode_www_form(token: token, rid: rid)
49+
http = Net::HTTP.new(uri.host)
50+
request = Net::HTTP::Delete.new(uri.request_uri)
51+
response = http.request(request)
52+
53+
@url, @original_status, @pc_status, @stored_at = nil
54+
@status_code = response.code.to_i
55+
@rid = rid
56+
@body = JSON.parse(response.body)
57+
58+
@body.key?('success')
59+
end
60+
61+
def bulk(rids_array = [])
62+
raise INVALID_RID_ARRAY if rids_array.empty?
63+
64+
uri = URI("#{BASE_URL}/bulk")
65+
uri.query = URI.encode_www_form(token: token)
66+
http = Net::HTTP.new(uri.host)
67+
request = Net::HTTP::Post.new(uri.request_uri, { 'Content-Type': 'application/json' })
68+
request.body = { rids: rids_array }.to_json
69+
response = http.request(request)
70+
71+
@body = JSON.parse(response.body)
72+
@original_status = @body.map { |item| item['original_status'].to_i }
73+
@status_code = response.code.to_i
74+
@pc_status = @body.map { |item| item['pc_status'].to_i }
75+
@url = @body.map { |item| item['url'] }
76+
@rid = @body.map { |item| item['rid'] }
77+
@stored_at = @body.map { |item| item['stored_at'] }
78+
79+
self
80+
end
81+
82+
def rids(limit = -1)
83+
uri = URI("#{BASE_URL}/rids")
84+
query_hash = { token: token }
85+
query_hash.merge!({ limit: limit }) if limit >= 0
86+
uri.query = URI.encode_www_form(query_hash)
87+
88+
response = Net::HTTP.get_response(uri)
89+
@url, @original_status, @pc_status, @stored_at = nil
90+
@status_code = response.code.to_i
91+
@body = JSON.parse(response.body)
92+
@rid = @body
93+
94+
@body
95+
end
96+
97+
def total_count
98+
uri = URI("#{BASE_URL}/total_count")
99+
uri.query = URI.encode_www_form(token: token)
100+
101+
response = Net::HTTP.get_response(uri)
102+
@url, @original_status, @pc_status, @stored_at = nil
103+
@status_code = response.code.to_i
104+
@rid = rid
105+
@body = JSON.parse(response.body)
106+
107+
body['totalCount']
108+
end
109+
110+
private
111+
112+
def decide_url_or_rid(url_or_rid)
113+
%r{^https?://} =~ url_or_rid ? { url: url_or_rid } : { rid: url_or_rid }
114+
end
115+
end
116+
end

spec/screenshots_api_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
require 'spec_helper.rb'
1+
require 'spec_helper'
22
require 'proxycrawl'
33

44
describe ProxyCrawl::ScreenshotsAPI do

0 commit comments

Comments
 (0)