Skip to content

Commit 1f881d7

Browse files
committed
Merge branch 'tasos-r7-feature/web_crawler_skip_paths'
2 parents 4074a12 + 4eca6e5 commit 1f881d7

File tree

1 file changed

+47
-20
lines changed

1 file changed

+47
-20
lines changed

modules/auxiliary/scanner/http/crawler.rb

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def initialize
2121
'License' => MSF_LICENSE
2222
)
2323

24+
register_advanced_options([
25+
OptString.new('ExcludePathPatterns', [false, 'Newline-separated list of path patterns to ignore (\'*\' is a wildcard)']),
26+
])
2427
@for_each_page_blocks = []
2528
end
2629

@@ -31,6 +34,17 @@ def focus_crawl(page)
3134
end
3235
=end
3336

37+
# Overrides Msf::Auxiliary::HttpCrawler#get_link_filter to add
38+
# datastore['ExcludePathPatterns']
39+
def get_link_filter
40+
return super if datastore['ExcludePathPatterns'].to_s.empty?
41+
42+
patterns = opt_patterns_to_regexps( datastore['ExcludePathPatterns'].to_s )
43+
patterns = patterns.map { |r| "(#{r.source})" }
44+
45+
Regexp.new( [["(#{super.source})"] | patterns].join( '|' ) )
46+
end
47+
3448
def run
3549
super
3650

@@ -163,31 +177,34 @@ def crawler_process_page(t, page, cnt)
163177
end
164178
end
165179

166-
form = {}.merge!(form_template)
167-
form[:method] = (f['method'] || 'GET').upcase
168-
form[:query] = target.query.to_s if form[:method] != "GET"
169-
form[:path] = target.path
170-
form[:params] = []
171-
f.css('input', 'textarea').each do |inp|
172-
form[:params] << [inp['name'].to_s, inp['value'] || inp.content || '', { :type => inp['type'].to_s }]
173-
end
180+
# skip this form if it matches exclusion criteria
181+
if !(target.to_s =~ get_link_filter)
182+
form = {}.merge!(form_template)
183+
form[:method] = (f['method'] || 'GET').upcase
184+
form[:query] = target.query.to_s if form[:method] != "GET"
185+
form[:path] = target.path
186+
form[:params] = []
187+
f.css('input', 'textarea').each do |inp|
188+
form[:params] << [inp['name'].to_s, inp['value'] || inp.content || '', { :type => inp['type'].to_s }]
189+
end
174190

175-
f.css( 'select' ).each do |s|
176-
value = nil
191+
f.css( 'select' ).each do |s|
192+
value = nil
177193

178-
# iterate over each option to find the default value (if there is a selected one)
179-
s.children.each do |opt|
180-
ov = opt['value'] || opt.content
181-
value = ov if opt['selected']
182-
end
194+
# iterate over each option to find the default value (if there is a selected one)
195+
s.children.each do |opt|
196+
ov = opt['value'] || opt.content
197+
value = ov if opt['selected']
198+
end
183199

184-
# set the first one as the default value if we don't already have one
185-
value ||= s.children.first['value'] || s.children.first.content rescue ''
200+
# set the first one as the default value if we don't already have one
201+
value ||= s.children.first['value'] || s.children.first.content rescue ''
186202

187-
form[:params] << [ s['name'].to_s, value.to_s, [ :type => 'select'] ]
188-
end
203+
form[:params] << [ s['name'].to_s, value.to_s, [ :type => 'select'] ]
204+
end
189205

190-
forms << form
206+
forms << form
207+
end
191208
end
192209
end
193210

@@ -252,4 +269,14 @@ def form_from_url( website, url )
252269
form[:method] ? form : nil
253270
end
254271

272+
private
273+
def opt_patterns_to_regexps( patterns )
274+
magic_wildcard_replacement = Rex::Text.rand_text_alphanumeric( 10 )
275+
patterns.to_s.split( /[\r\n]+/).map do |p|
276+
Regexp.new '^' + Regexp.escape( p.gsub( '*', magic_wildcard_replacement ) ).
277+
gsub( magic_wildcard_replacement, '.*' ) + '$'
278+
end
279+
end
280+
281+
255282
end

0 commit comments

Comments
 (0)