Skip to content

Commit 0e1bafb

Browse files
committed
Land rapid7#8902, vendored robots gem
2 parents a0131f4 + b2fc0e5 commit 0e1bafb

File tree

4 files changed

+162
-8
lines changed

4 files changed

+162
-8
lines changed

Dockerfile

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@ RUN apk update && \
4242
&& apk del .ruby-builddeps \
4343
&& rm -rf /var/cache/apk/*
4444

45-
# fix for robots gem not readable (known bug)
46-
# https://github.com/rapid7/metasploit-framework/issues/6068
47-
RUN chmod o+r /usr/local/bundle/gems/robots-*/lib/robots.rb
48-
4945
RUN adduser -g msfconsole -D $MSF_USER
5046

5147
RUN /usr/sbin/setcap cap_net_raw,cap_net_bind_service=+eip $(which ruby)

Gemfile.lock

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ PATH
5858
rex-struct2
5959
rex-text
6060
rex-zip
61-
robots
6261
ruby_smb
6362
rubyntlm
6463
rubyzip
@@ -271,7 +270,6 @@ GEM
271270
rex-zip (0.1.3)
272271
rex-text
273272
rkelly-remix (0.0.7)
274-
robots (0.10.1)
275273
rspec (3.6.0)
276274
rspec-core (~> 3.6.0)
277275
rspec-expectations (~> 3.6.0)

lib/robots.rb

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#
2+
# Copyright (c) 2008 Kyle Maxwell, contributors
3+
#
4+
# Permission is hereby granted, free of charge, to any person
5+
# obtaining a copy of this software and associated documentation
6+
# files (the "Software"), to deal in the Software without
7+
# restriction, including without limitation the rights to use,
8+
# copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the
10+
# Software is furnished to do so, subject to the following
11+
# conditions:
12+
#
13+
# The above copyright notice and this permission notice shall be
14+
# included in all copies or substantial portions of the Software.
15+
#
16+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18+
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23+
# OTHER DEALINGS IN THE SOFTWARE.
24+
#
25+
26+
require "open-uri"
27+
require "uri"
28+
require "timeout"
29+
require 'rex/logging/log_dispatcher'
30+
31+
# https://github.com/fizx/robots
32+
class Robots
33+
DEFAULT_TIMEOUT = 3
34+
35+
# Represents a parsed robots.txt file
36+
class ParsedRobots
37+
def initialize(uri, user_agent)
38+
@last_accessed = Time.at(1)
39+
40+
io = Robots.get_robots_txt(uri, user_agent)
41+
42+
if !io || io.content_type != "text/plain" || io.status.first != "200"
43+
io = StringIO.new("User-agent: *\nAllow: /\n")
44+
end
45+
46+
@other = {}
47+
@disallows = {}
48+
@allows = {}
49+
@delays = {} # added delays to make it work
50+
agent = /.*/
51+
io.each do |line|
52+
next if line =~ /^\s*(#.*|$)/
53+
arr = line.split(":")
54+
key = arr.shift.to_s.downcase
55+
value = arr.join(":").strip
56+
value.strip!
57+
case key
58+
when "user-agent"
59+
agent = to_regex(value)
60+
when "allow"
61+
@allows[agent] ||= []
62+
@allows[agent] << to_regex(value)
63+
when "disallow"
64+
@disallows[agent] ||= []
65+
@disallows[agent] << to_regex(value)
66+
when "crawl-delay"
67+
@delays[agent] = value.to_i
68+
else
69+
@other[key] ||= []
70+
@other[key] << value
71+
end
72+
end
73+
74+
@parsed = true
75+
end
76+
77+
def allowed?(uri, user_agent)
78+
return true unless @parsed
79+
allowed = true
80+
path = uri.request_uri
81+
82+
@disallows.each do |key, value|
83+
if user_agent =~ key
84+
value.each do |rule|
85+
allowed = false if path =~ rule
86+
end
87+
end
88+
end
89+
90+
@allows.each do |key, value|
91+
unless allowed
92+
if user_agent =~ key
93+
value.each do |rule|
94+
if path =~ rule
95+
allowed = true
96+
end
97+
end
98+
end
99+
end
100+
end
101+
102+
if allowed && @delays[user_agent]
103+
sleep @delays[user_agent] - (Time.now - @last_accessed)
104+
@last_accessed = Time.now
105+
end
106+
107+
return allowed
108+
end
109+
110+
def other_values
111+
@other
112+
end
113+
114+
protected
115+
116+
def to_regex(pattern)
117+
return /should-not-match-anything-123456789/ if pattern.strip.empty?
118+
pattern = Regexp.escape(pattern)
119+
pattern.gsub!(Regexp.escape("*"), ".*")
120+
Regexp.compile("^#{pattern}")
121+
end
122+
end
123+
124+
def self.get_robots_txt(uri, user_agent)
125+
begin
126+
Timeout.timeout(Robots.timeout) do
127+
begin
128+
URI.join(uri.to_s, "/robots.txt").open("User-Agent" => user_agent)
129+
rescue StandardError
130+
nil
131+
end
132+
end
133+
rescue Timeout::Error
134+
dlog("robots.txt request timed out")
135+
end
136+
end
137+
138+
attr_writer :timeout
139+
140+
def self.timeout
141+
@timeout || DEFAULT_TIMEOUT
142+
end
143+
144+
def initialize(user_agent)
145+
@user_agent = user_agent
146+
@parsed = {}
147+
end
148+
149+
def allowed?(uri)
150+
uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
151+
host = uri.host
152+
@parsed[host] ||= ParsedRobots.new(uri, @user_agent)
153+
@parsed[host].allowed?(uri, @user_agent)
154+
end
155+
156+
def other_values(uri)
157+
uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
158+
host = uri.host
159+
@parsed[host] ||= ParsedRobots.new(uri, @user_agent)
160+
@parsed[host].other_values
161+
end
162+
end

metasploit-framework.gemspec

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,6 @@ Gem::Specification.new do |spec|
171171
spec.add_runtime_dependency 'rex-exploitation'
172172
# Command line editing, history, and tab completion in msfconsole
173173
spec.add_runtime_dependency 'rb-readline'
174-
# Needed by anemone crawler
175-
spec.add_runtime_dependency 'robots'
176174
# Needed by some modules
177175
spec.add_runtime_dependency 'rubyzip'
178176
# Needed for some post modules

0 commit comments

Comments
 (0)