Merge pull request #2 from jaywad/support-scraper-leads

crawlbase · web-flow · commit e0406ec8b6af · 2020-10-13T13:54:43.000+03:00
Support ScraperAPI and LeadsAPI
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Or install it yourself as:
 
     $ gem install proxycrawl
 
-## Usage
+## Crawling API Usage
 
 Require the gem in your project
 
@@ -130,6 +130,48 @@ puts response.original_status
 puts response.pc_status
 ```
 
+## Scraper API usage
+
+Initialize the Scraper API using your normal token and call the `get` method.
+
+```ruby
+scraper_api = ProxyCrawl::ScraperAPI.new(token: 'YOUR_TOKEN')
+```
+
+Pass the url that you want to scrape plus any options from the ones available in the [Scraper API documentation](https://proxycrawl.com/docs/scraper-api/parameters).
+
+```ruby
+api.get(url, options)
+```
+
+Example:
+
+```ruby
+begin
+  response = scraper_api.get('https://www.amazon.com/Halo-SleepSack-Swaddle-Triangle-Neutral/dp/B01LAG1TOS')
+  puts response.status_code
+  puts response.body
+rescue => exception
+  puts exception.backtrace
+end
+```
+
+## Leads API usage
+
+Initialize with your Leads API token and call the `get` method.
+
+```ruby
+leads_api = ProxyCrawl::LeadsAPI.new(token: 'YOUR_TOKEN')
+
+begin
+  response = leads_api.get('stripe.com')
+  puts response.status_code
+  puts response.body
+rescue => exception
+  puts exception.backtrace
+end
+```
+
 If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
 
 ## Development
diff --git a/lib/proxycrawl.rb b/lib/proxycrawl.rb
@@ -1,5 +1,9 @@
-require "proxycrawl/version"
+# frozen_string_literal: true
+
+require 'proxycrawl/version'
 require 'proxycrawl/api'
+require 'proxycrawl/scraper_api'
+require 'proxycrawl/leads_api'
 
 module ProxyCrawl
 end
diff --git a/lib/proxycrawl/api.rb b/lib/proxycrawl/api.rb
@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+
 require 'net/http'
 require 'json'
 require 'uri'
@@ -7,8 +8,6 @@ module ProxyCrawl
   class API
     attr_reader :token, :body, :status_code, :original_status, :pc_status, :url
 
-    BASE_URL = 'https://api.proxycrawl.com'
-
     INVALID_TOKEN = 'Token is required'
     INVALID_URL = 'URL is required'
 
@@ -58,15 +57,19 @@ def post(url, data, options = {})
 
     private
 
+    def base_url
+      'https://api.proxycrawl.com'
+    end
+
     def prepare_uri(url, options)
-      uri = URI(BASE_URL)
+      uri = URI(base_url)
       uri.query = URI.encode_www_form({ token: @token, url: url }.merge(options))
 
       uri
     end
 
     def prepare_response(response, format)
-      if format == 'json'
+      if format == 'json' || base_url.include?('/scraper')
         @status_code = response.code.to_i
         @body = response.body
       else
@@ -78,4 +81,4 @@ def prepare_response(response, format)
       end
     end
   end
-end
+end
diff --git a/lib/proxycrawl/leads_api.rb b/lib/proxycrawl/leads_api.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+require 'net/http'
+require 'json'
+require 'uri'
+
+module ProxyCrawl
+  class LeadsAPI
+    attr_reader :token, :body, :status_code
+
+    INVALID_TOKEN = 'Token is required'
+    INVALID_DOMAIN = 'Domain is required'
+
+    def initialize(options = {})
+      raise INVALID_TOKEN if options[:token].nil?
+
+      @token = options[:token]
+    end
+
+    def get(domain)
+      raise INVALID_DOMAIN if domain.empty?
+
+      uri = URI('https://api.proxycrawl.com/leads')
+      uri.query = URI.encode_www_form({ token: token, domain: domain })
+
+      response = Net::HTTP.get_response(uri)
+
+      @status_code = response.code.to_i
+      @body = response.body
+
+      self
+    end
+  end
+end
diff --git a/lib/proxycrawl/scraper_api.rb b/lib/proxycrawl/scraper_api.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module ProxyCrawl
+  class ScraperAPI < ProxyCrawl::API
+
+    def post
+      raise 'Only GET is allowed for the ScraperAPI'
+    end
+
+    private
+
+    def base_url
+      'https://api.proxycrawl.com/scraper'
+    end
+  end
+end
diff --git a/lib/proxycrawl/version.rb b/lib/proxycrawl/version.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 module ProxyCrawl
-  VERSION = "0.2.0"
+  VERSION = '0.2.1'
 end