Skip to content
This repository was archived by the owner on Jul 3, 2023. It is now read-only.

Commit 715d662

Browse files
author
proxycrawl
committed
Adds support to ScreenshotsAPI and small improvements
1 parent 294cc80 commit 715d662

File tree

8 files changed

+201
-7
lines changed

8 files changed

+201
-7
lines changed

LICENSE.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
The MIT License (MIT)
22

3-
Copyright (c) 2020 ProxyCrawl
3+
Copyright (c) 2021 ProxyCrawl
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,57 @@ end
174174

175175
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
176176

177+
178+
## Screenshots API usage
179+
180+
Initialize with your Screenshots API token and call the `get` method.
181+
182+
```ruby
183+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
184+
185+
begin
186+
response = screenshots_api.get('https://www.apple.com')
187+
puts response.status_code
188+
puts response.screenshot_path # do something with screenshot_path here
189+
rescue => exception
190+
puts exception.backtrace
191+
end
192+
```
193+
194+
or with using a block
195+
196+
```ruby
197+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
198+
199+
begin
200+
response = screenshots_api.get('https://www.apple.com') do |file|
201+
# do something (reading/writing) with the image file here
202+
end
203+
puts response.status_code
204+
rescue => exception
205+
puts exception.backtrace
206+
end
207+
```
208+
209+
or specifying a file path
210+
211+
```ruby
212+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
213+
214+
begin
215+
response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
216+
# do something (reading/writing) with the image file here
217+
end
218+
puts response.status_code
219+
rescue => exception
220+
puts exception.backtrace
221+
end
222+
```
223+
224+
Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
225+
226+
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
227+
177228
## Development
178229

179230
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

lib/proxycrawl.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
require 'proxycrawl/api'
55
require 'proxycrawl/scraper_api'
66
require 'proxycrawl/leads_api'
7+
require 'proxycrawl/screenshots_api'
78

89
module ProxyCrawl
910
end

lib/proxycrawl/api.rb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,19 @@ def prepare_uri(url, options)
7070

7171
def prepare_response(response, format)
7272
if format == 'json' || base_url.include?('/scraper')
73+
json_body = JSON.parse(response.body)
74+
@original_status = json_body['original_status'].to_i
75+
@pc_status = json_body['pc_status'].to_i
76+
@url = json_body['url']
7377
@status_code = response.code.to_i
74-
@body = response.body
7578
else
7679
@original_status = response['original_status'].to_i
7780
@status_code = response.code.to_i
7881
@pc_status = response['pc_status'].to_i
7982
@url = response['url']
80-
@body = response.body
8183
end
84+
85+
@body = response.body
8286
end
8387
end
8488
end

lib/proxycrawl/scraper_api.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,20 @@
22

33
module ProxyCrawl
44
class ScraperAPI < ProxyCrawl::API
5+
attr_reader :remaining_requests
56

67
def post
78
raise 'Only GET is allowed for the ScraperAPI'
89
end
910

1011
private
1112

13+
def prepare_response(response, format)
14+
super(response, format)
15+
json_body = JSON.parse(response.body)
16+
@remaining_requests = json_body['remaining_requests'].to_i
17+
end
18+
1219
def base_url
1320
'https://api.proxycrawl.com/scraper'
1421
end

lib/proxycrawl/screenshots_api.rb

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# frozen_string_literal: true
2+
3+
require 'securerandom'
4+
require 'tmpdir'
5+
6+
module ProxyCrawl
7+
class ScreenshotsAPI < ProxyCrawl::API
8+
attr_reader :screenshot_path, :success, :remaining_requests, :screenshot_url
9+
10+
INVALID_SAVE_TO_PATH_FILENAME = 'Filename must end with .jpg or .jpeg'
11+
SAVE_TO_PATH_FILENAME_PATTERN = /.+\.(jpg|JPG|jpeg|JPEG)$/.freeze
12+
13+
def post
14+
raise 'Only GET is allowed for the ScreenshotsAPI'
15+
end
16+
17+
def get(url, options = {})
18+
screenshot_path = options.delete(:save_to_path) || generate_file_path
19+
raise INVALID_SAVE_TO_PATH_FILENAME unless SAVE_TO_PATH_FILENAME_PATTERN =~ screenshot_path
20+
21+
response = super(url, options)
22+
file = File.open(screenshot_path, 'w+')
23+
file.write(response.body&.force_encoding('UTF-8'))
24+
@screenshot_path = screenshot_path
25+
yield(file) if block_given?
26+
response
27+
ensure
28+
file&.close
29+
end
30+
31+
private
32+
33+
def prepare_response(response, format)
34+
super(response, format)
35+
@remaining_requests = response['remaining_requests'].to_i
36+
@success = response['success'] == 'true'
37+
@screenshot_url = response['screenshot_url']
38+
end
39+
40+
def base_url
41+
'https://api.proxycrawl.com/screenshots'
42+
end
43+
44+
def generate_file_name
45+
"#{SecureRandom.urlsafe_base64}.jpg"
46+
end
47+
48+
def generate_file_path
49+
File.join(Dir.tmpdir, generate_file_name)
50+
end
51+
end
52+
end

spec/api_spec.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
require 'proxycrawl'
33

44
describe ProxyCrawl::API do
5-
it "raises an error if token is missing" do
5+
it 'raises an error if token is missing' do
66
expect { ProxyCrawl::API.new }.to raise_error(RuntimeError, 'Token is required')
77
end
88

9-
it "sets/reads token" do
9+
it 'sets/reads token' do
1010
expect(ProxyCrawl::API.new(token: 'test').token).to eql('test')
1111
end
1212

@@ -20,7 +20,7 @@
2020

2121
api = ProxyCrawl::API.new(token: 'test')
2222

23-
response = api.get("http://httpbin.org/anything?param1=x&params2=y")
23+
response = api.get('http://httpbin.org/anything?param1=x&params2=y')
2424

2525
expect(response.status_code).to eql(200)
2626
expect(response.original_status).to eql(200)
@@ -70,4 +70,4 @@
7070
end
7171
end
7272

73-
end
73+
end

spec/screenshots_api_spec.rb

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
require 'spec_helper.rb'
2+
require 'proxycrawl'
3+
4+
describe ProxyCrawl::ScreenshotsAPI do
5+
it 'raises an error if token is missing' do
6+
expect { ProxyCrawl::ScreenshotsAPI.new }.to raise_error(RuntimeError, 'Token is required')
7+
end
8+
9+
it 'sets/reads token' do
10+
expect(ProxyCrawl::ScreenshotsAPI.new(token: 'test').token).to eql('test')
11+
end
12+
13+
describe '#get' do
14+
before(:each) do
15+
stub_request(:get, 'https://api.proxycrawl.com/screenshots?token=test&url=http%3A%2F%2Fhttpbin.org%2Fanything%3Fparam1%3Dx%26params2%3Dy').
16+
to_return(
17+
body: 'body',
18+
status: 200,
19+
headers: { skip_normalize: true, 'original_status' => 200, 'pc_status' => 200, 'url' => 'http://httpbin.org/anything?param1=x&params2=y'})
20+
end
21+
22+
it 'sends an get request to ProxyCrawl Screenshots API' do
23+
api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
24+
25+
response = api.get("http://httpbin.org/anything?param1=x&params2=y")
26+
27+
expect(response.status_code).to eql(200)
28+
expect(response.original_status).to eql(200)
29+
expect(response.pc_status).to eql(200)
30+
expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
31+
expect(response.body).to eql('body')
32+
expect(response.screenshot_path).not_to be_empty
33+
end
34+
35+
it 'accepts a valid save_to_path option' do
36+
api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
37+
38+
response = api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: save_to_path)
39+
40+
expect(response.status_code).to eql(200)
41+
expect(response.original_status).to eql(200)
42+
expect(response.pc_status).to eql(200)
43+
expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
44+
expect(response.body).to eql('body')
45+
expect(response.screenshot_path).to eql(File.join(Dir.tmpdir, 'test-image.jpg'))
46+
end
47+
48+
it 'rejects an invalid save_to_path option' do
49+
api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
50+
51+
expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: '~/images/image_filename.png') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
52+
expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: 'image_filename.png') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
53+
expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: '~/images/image_filename') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
54+
expect { api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: 'image_filename') }.to raise_error(RuntimeError, 'Filename must end with .jpg or .jpeg')
55+
end
56+
57+
it 'accepts a block' do
58+
api = ProxyCrawl::ScreenshotsAPI.new(token: 'test')
59+
60+
response = api.get("http://httpbin.org/anything?param1=x&params2=y", save_to_path: save_to_path) do |file|
61+
expect(file).to be_kind_of(File)
62+
expect(file.path).to eql(File.join(Dir.tmpdir, 'test-image.jpg'))
63+
end
64+
65+
expect(response.status_code).to eql(200)
66+
expect(response.original_status).to eql(200)
67+
expect(response.pc_status).to eql(200)
68+
expect(response.url).to eql('http://httpbin.org/anything?param1=x&params2=y')
69+
expect(response.body).to eql('body')
70+
expect(response.screenshot_path).to eql(File.join(Dir.tmpdir, 'test-image.jpg'))
71+
end
72+
end
73+
74+
private
75+
76+
def save_to_path
77+
File.join(Dir.tmpdir, 'test-image.jpg')
78+
end
79+
end

0 commit comments

Comments
 (0)