Adds support to StorageAPI and small improvements

proxycrawl · proxycrawl · commit 94ff440ff82a · 2021-07-14T22:59:07.000+03:00
diff --git a/README.md b/README.md
@@ -149,6 +149,7 @@ Example:
 ```ruby
 begin
   response = scraper_api.get('https://www.amazon.com/Halo-SleepSack-Swaddle-Triangle-Neutral/dp/B01LAG1TOS')
+  puts response.remaining_requests
   puts response.status_code
   puts response.body
 rescue => exception
@@ -160,11 +161,15 @@ end
 
 Initialize with your Leads API token and call the `get` method.
 
+For more details on the implementation, please visit the [Leads API documentation](https://proxycrawl.com/docs/leads-api).
+
 ```ruby
 leads_api = ProxyCrawl::LeadsAPI.new(token: 'YOUR_TOKEN')
 
 begin
   response = leads_api.get('stripe.com')
+  puts response.success
+  puts response.remaining_requests
   puts response.status_code
   puts response.body
 rescue => exception
@@ -184,6 +189,8 @@ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
 
 begin
   response = screenshots_api.get('https://www.apple.com')
+  puts response.success
+  puts response.remaining_requests
   puts response.status_code
   puts response.screenshot_path # do something with screenshot_path here
 rescue => exception
@@ -200,6 +207,8 @@ begin
   response = screenshots_api.get('https://www.apple.com') do |file|
     # do something (reading/writing) with the image file here
   end
+  puts response.success
+  puts response.remaining_requests
   puts response.status_code
 rescue => exception
   puts exception.backtrace
@@ -215,6 +224,8 @@ begin
   response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
     # do something (reading/writing) with the image file here
   end
+  puts response.success
+  puts response.remaining_requests
   puts response.status_code
 rescue => exception
   puts exception.backtrace
@@ -223,6 +234,111 @@ end
 
 Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
 
+## Storage API usage
+
+Initialize the Storage API using your private token.
+
+```ruby
+storage_api = ProxyCrawl::StorageAPI.new(token: 'YOUR_TOKEN')
+```
+
+Pass the [url](https://proxycrawl.com/docs/storage-api/parameters/#url) that you want to get from [Proxycrawl Storage](https://proxycrawl.com/dashboard/storage).
+
+```ruby
+begin
+  response = storage_api.get('https://www.apple.com')
+  puts response.original_status
+  puts response.pc_status
+  puts response.url
+  puts response.status_code
+  puts response.rid
+  puts response.body
+  puts response.stored_at
+rescue => exception
+  puts exception.backtrace
+end
+```
+
+or you can use the [RID](https://proxycrawl.com/docs/storage-api/parameters/#rid)
+
+```ruby
+begin
+  response = storage_api.get(RID)
+  puts response.original_status
+  puts response.pc_status
+  puts response.url
+  puts response.status_code
+  puts response.rid
+  puts response.body
+  puts response.stored_at
+rescue => exception
+  puts exception.backtrace
+end
+```
+
+Note: One of the two RID or URL must be sent. So both are optional but it's mandatory to send one of the two.
+
+### [Delete](https://proxycrawl.com/docs/storage-api/delete/) request
+
+To delete a storage item from your storage area, use the correct RID
+
+```ruby
+if storage_api.delete(RID)
+  puts 'delete success'
+else
+  puts "Unable to delete: #{storage_api.body['error']}"
+end
+```
+
+### [Bulk](https://proxycrawl.com/docs/storage-api/bulk/) request
+
+To do a bulk request with a list of RIDs, please send the list of rids as an array
+
+```ruby
+begin
+  response = storage_api.bulk([RID1, RID2, RID3, ...])
+  puts response.original_status
+  puts response.pc_status
+  puts response.url
+  puts response.status_code
+  puts response.rid
+  puts response.body
+  puts response.stored_at
+rescue => exception
+  puts exception.backtrace
+end
+```
+
+### [RIDs](https://proxycrawl.com/docs/storage-api/rids) request
+
+To request a bulk list of RIDs from your storage area
+
+```ruby
+begin
+  response = storage_api.rids
+  puts response.status_code
+  puts response.rid
+  puts response.body
+rescue => exception
+  puts exception.backtrace
+end
+```
+
+You can also specify a limit as a parameter
+
+```ruby
+storage_api.rids(100)
+```
+
+### [Total Count](https://proxycrawl.com/docs/storage-api/total_count)
+
+To get the total number of documents in your storage area
+
+```ruby
+total_count = storage_api.total_count
+puts "total_count: #{total_count}"
+```
+
 If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
 
 ## Development
diff --git a/lib/proxycrawl.rb b/lib/proxycrawl.rb
@@ -5,6 +5,7 @@
 require 'proxycrawl/scraper_api'
 require 'proxycrawl/leads_api'
 require 'proxycrawl/screenshots_api'
+require 'proxycrawl/storage_api'
 
 module ProxyCrawl
 end
diff --git a/lib/proxycrawl/api.rb b/lib/proxycrawl/api.rb
@@ -6,7 +6,7 @@
 
 module ProxyCrawl
   class API
-    attr_reader :token, :body, :status_code, :original_status, :pc_status, :url
+    attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
 
     INVALID_TOKEN = 'Token is required'
     INVALID_URL = 'URL is required'
@@ -69,19 +69,13 @@ def prepare_uri(url, options)
     end
 
     def prepare_response(response, format)
-      if format == 'json' || base_url.include?('/scraper')
-        json_body = JSON.parse(response.body)
-        @original_status = json_body['original_status'].to_i
-        @pc_status = json_body['pc_status'].to_i
-        @url = json_body['url']
-        @status_code = response.code.to_i
-      else
-        @original_status = response['original_status'].to_i
-        @status_code = response.code.to_i
-        @pc_status = response['pc_status'].to_i
-        @url = response['url']
-      end
+      res = format == 'json' || base_url.include?('/scraper') ? JSON.parse(response.body) : response
 
+      @original_status = res['original_status'].to_i
+      @pc_status = res['pc_status'].to_i
+      @url = res['url']
+      @storage_url = res['storage_url']
+      @status_code = response.code.to_i
       @body = response.body
     end
   end
diff --git a/lib/proxycrawl/leads_api.rb b/lib/proxycrawl/leads_api.rb
@@ -6,13 +6,13 @@
 
 module ProxyCrawl
   class LeadsAPI
-    attr_reader :token, :body, :status_code
+    attr_reader :token, :body, :status_code, :success, :remaining_requests
 
     INVALID_TOKEN = 'Token is required'
     INVALID_DOMAIN = 'Domain is required'
 
     def initialize(options = {})
-      raise INVALID_TOKEN if options[:token].nil?
+      raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
 
       @token = options[:token]
     end
@@ -24,11 +24,18 @@ def get(domain)
       uri.query = URI.encode_www_form({ token: token, domain: domain })
 
       response = Net::HTTP.get_response(uri)
-
       @status_code = response.code.to_i
       @body = response.body
 
+      json_body = JSON.parse(response.body)
+      @success = json_body['success']
+      @remaining_requests = json_body['remaining_requests'].to_i
+
       self
     end
+
+    def post
+      raise 'Only GET is allowed for the LeadsAPI'
+    end
   end
 end
diff --git a/lib/proxycrawl/storage_api.rb b/lib/proxycrawl/storage_api.rb
@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+
+require 'net/http'
+require 'json'
+require 'uri'
+
+module ProxyCrawl
+  class StorageAPI
+    attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
+
+    INVALID_TOKEN = 'Token is required'
+    INVALID_RID = 'RID is required'
+    INVALID_RID_ARRAY = 'One or more RIDs are required'
+    INVALID_URL_OR_RID = 'Either URL or RID is required'
+    BASE_URL = 'https://api.proxycrawl.com/storage'
+
+    def initialize(options = {})
+      raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
+
+      @token = options[:token]
+    end
+
+    def get(url_or_rid, format = 'html')
+      raise INVALID_URL_OR_RID if url_or_rid.nil? || url_or_rid.empty?
+
+      uri = URI(BASE_URL)
+      uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
+      response = Net::HTTP.get_response(uri)
+
+      res = format == 'json' ? JSON.parse(response.body) : response
+
+      @original_status = res['original_status'].to_i
+      @pc_status = res['pc_status'].to_i
+      @url = res['url']
+      @rid = res['rid']
+      @stored_at = res['stored_at']
+
+      @status_code = response.code.to_i
+      @body = response.body
+
+      self
+    end
+
+    def delete(rid)
+      raise INVALID_RID if rid.nil? || rid.empty?
+
+      uri = URI(BASE_URL)
+      uri.query = URI.encode_www_form(token: token, rid: rid)
+      http = Net::HTTP.new(uri.host)
+      request = Net::HTTP::Delete.new(uri.request_uri)
+      response = http.request(request)
+
+      @url, @original_status, @pc_status, @stored_at = nil
+      @status_code = response.code.to_i
+      @rid = rid
+      @body = JSON.parse(response.body)
+
+      @body.key?('success')
+    end
+
+    def bulk(rids_array = [])
+      raise INVALID_RID_ARRAY if rids_array.empty?
+
+      uri = URI("#{BASE_URL}/bulk")
+      uri.query = URI.encode_www_form(token: token)
+      http = Net::HTTP.new(uri.host)
+      request = Net::HTTP::Post.new(uri.request_uri, { 'Content-Type': 'application/json' })
+      request.body = { rids: rids_array }.to_json
+      response = http.request(request)
+
+      @body = JSON.parse(response.body)
+      @original_status = @body.map { |item| item['original_status'].to_i }
+      @status_code = response.code.to_i
+      @pc_status = @body.map { |item| item['pc_status'].to_i }
+      @url = @body.map { |item| item['url'] }
+      @rid = @body.map { |item| item['rid'] }
+      @stored_at = @body.map { |item| item['stored_at'] }
+
+      self
+    end
+
+    def rids(limit = -1)
+      uri = URI("#{BASE_URL}/rids")
+      query_hash = { token: token }
+      query_hash.merge!({ limit: limit }) if limit >= 0
+      uri.query = URI.encode_www_form(query_hash)
+
+      response = Net::HTTP.get_response(uri)
+      @url, @original_status, @pc_status, @stored_at = nil
+      @status_code = response.code.to_i
+      @body = JSON.parse(response.body)
+      @rid = @body
+
+      @body
+    end
+
+    def total_count
+      uri = URI("#{BASE_URL}/total_count")
+      uri.query = URI.encode_www_form(token: token)
+
+      response = Net::HTTP.get_response(uri)
+      @url, @original_status, @pc_status, @stored_at = nil
+      @status_code = response.code.to_i
+      @rid = rid
+      @body = JSON.parse(response.body)
+
+      body['totalCount']
+    end
+
+    private
+
+    def decide_url_or_rid(url_or_rid)
+      %r{^https?://} =~ url_or_rid ? { url: url_or_rid } : { rid: url_or_rid }
+    end
+  end
+end
diff --git a/spec/screenshots_api_spec.rb b/spec/screenshots_api_spec.rb
@@ -1,4 +1,4 @@
-require 'spec_helper.rb'
+require 'spec_helper'
 require 'proxycrawl'
 
 describe ProxyCrawl::ScreenshotsAPI do
diff --git a/spec/storage_api_spec.rb b/spec/storage_api_spec.rb

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-require 'spec_helper.rb'`
	`1`	`+require 'spec_helper'`
`2`	`2`	`require 'proxycrawl'`
`3`	`3`
`4`	`4`	`describe ProxyCrawl::ScreenshotsAPI do`