Skip to content

Commit 19477db

Browse files
author
Tod Beardsley
committed
Land rapid7#3537 from @pagedegeek, msfcrawler fix
2 parents 9fb18f1 + cd08acd commit 19477db

File tree

9 files changed

+76
-141
lines changed

9 files changed

+76
-141
lines changed

Gemfile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ gem 'robots'
2121
# Needed for some post modules
2222
gem 'sqlite3'
2323

24-
gem 'hpricot', :require => 'hpricot'
25-
2624
group :db do
2725
# Needed for Msf::DbManager
2826
gem 'activerecord', '>= 3.0.0', '< 4.0.0'

Gemfile.lock

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ GEM
2121
factory_girl (4.2.0)
2222
activesupport (>= 3.0.0)
2323
fivemat (1.2.1)
24-
hpricot (0.8.6)
2524
i18n (0.6.5)
2625
json (1.8.0)
2726
metasploit_data_models (0.17.0)
@@ -84,7 +83,6 @@ DEPENDENCIES
8483
database_cleaner
8584
factory_girl (>= 4.1.0)
8685
fivemat (= 1.2.1)
87-
hpricot
8886
json
8987
metasploit_data_models (= 0.17.0)
9088
meterpreter_bins (= 0.0.6)

data/msfcrawler/basic.rb

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
require 'rubygems'
1515
require 'pathname'
16-
require 'hpricot'
16+
require 'nokogiri'
1717
require 'uri'
1818

1919
class CrawlerSimple < BaseParser
@@ -24,23 +24,20 @@ def parse(request,result)
2424
return
2525
end
2626

27-
doc = Hpricot(result.body.to_s)
28-
doc.search('a').each do |link|
29-
30-
hr = link.attributes['href']
31-
32-
if hr and !hr.match(/^(\#|javascript\:)/)
33-
begin
34-
hreq = urltohash('GET',hr,request['uri'],nil)
35-
36-
insertnewpath(hreq)
37-
38-
rescue URI::InvalidURIError
39-
#puts "Parse error"
40-
#puts "Error: #{link[0]}"
27+
# doc = Hpricot(result.body.to_s)
28+
doc = Nokogiri::HTML(result.body.to_s)
29+
doc.css('a').each do |anchor_tag|
30+
hr = anchor_tag['href']
31+
if hr && !hr.match(/^(\#|javascript\:)/)
32+
begin
33+
hreq = urltohash('GET', hr, request['uri'], nil)
34+
insertnewpath(hreq)
35+
rescue URI::InvalidURIError
36+
#puts "Parse error"
37+
#puts "Error: #{link[0]}"
38+
end
4139
end
4240
end
43-
end
4441
end
4542
end
4643

data/msfcrawler/forms.rb

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
require 'rubygems'
1515
require 'pathname'
16-
require 'hpricot'
16+
require 'nokogiri'
1717
require 'uri'
1818

1919
class CrawlerForms < BaseParser
@@ -27,49 +27,30 @@ def parse(request,result)
2727
hr = ''
2828
m = ''
2929

30-
doc = Hpricot(result.body.to_s)
31-
doc.search('form').each do |f|
32-
hr = f.attributes['action']
30+
doc = Nokogiri::HTML(result.body.to_s)
31+
doc.css('form').each do |f|
32+
hr = f['action']
3333

34-
fname = f.attributes['name']
35-
if fname.empty?
36-
fname = "NONE"
37-
end
38-
39-
m = "GET"
40-
if !f.attributes['method'].empty?
41-
m = f.attributes['method'].upcase
42-
end
34+
fname = f['name']
35+
fname = "NONE" if fname.empty?
4336

44-
#puts "Parsing form name: #{fname} (#{m})"
37+
m = f['method'].empty? ? 'GET' : f['method'].upcase
4538

46-
htmlform = Hpricot(f.inner_html)
39+
htmlform = Nokogiri::HTML(f.inner_html)
4740

4841
arrdata = []
4942

50-
htmlform.search('input').each do |p|
51-
#puts p.attributes['name']
52-
#puts p.attributes['type']
53-
#puts p.attributes['value']
54-
55-
#raw_request has uri_encoding disabled as it encodes '='.
56-
arrdata << (p.attributes['name'] + "=" + Rex::Text.uri_encode(p.attributes['value']))
43+
htmlform.css('input').each do |p|
44+
arrdata << "#{p['name']}=#{Rex::Text.uri_encode(p['value'])}"
5745
end
5846

5947
data = arrdata.join("&").to_s
6048

61-
6249
begin
63-
hreq = urltohash(m,hr,request['uri'],data)
64-
50+
hreq = urltohash(m, hr, request['uri'], data)
6551
hreq['ctype'] = 'application/x-www-form-urlencoded'
66-
6752
insertnewpath(hreq)
68-
69-
7053
rescue URI::InvalidURIError
71-
#puts "Parse error"
72-
#puts "Error: #{link[0]}"
7354
end
7455
end
7556
end

data/msfcrawler/frames.rb

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,29 @@
99

1010
require 'rubygems'
1111
require 'pathname'
12-
require 'hpricot'
12+
require 'nokogiri'
1313
require 'uri'
1414

1515
class CrawlerFrames < BaseParser
1616

1717
def parse(request,result)
1818

19-
if !result['Content-Type'].include? "text/html"
20-
return
21-
end
22-
23-
doc = Hpricot(result.body.to_s)
24-
doc.search('iframe').each do |ifra|
25-
26-
ir = ifra.attributes['src']
27-
28-
if ir and !ir.match(/^(\#|javascript\:)/)
29-
begin
30-
hreq = urltohash('GET',ir,request['uri'],nil)
19+
return unless result['Content-Type'].include?('text/html')
3120

32-
insertnewpath(hreq)
21+
doc = Nokogiri::HTML(result.body.to_s)
22+
doc.css('iframe').each do |ifra|
23+
ir = ifra['src']
3324

34-
rescue URI::InvalidURIError
35-
#puts "Error"
25+
if ir && !ir.match(/^(\#|javascript\:)/)
26+
begin
27+
hreq = urltohash('GET', ir, request['uri'], nil)
28+
insertnewpath(hreq)
29+
rescue URI::InvalidURIError
30+
end
3631
end
37-
end
32+
3833
end
3934
end
35+
4036
end
4137

data/msfcrawler/image.rb

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,26 @@
1010

1111
require 'rubygems'
1212
require 'pathname'
13-
require 'hpricot'
13+
require 'nokogiri'
1414
require 'uri'
1515

1616
class CrawlerImage < BaseParser
1717

1818
def parse(request,result)
1919

20-
if !result['Content-Type'].include? "text/html"
21-
return
22-
end
23-
24-
doc = Hpricot(result.body.to_s)
25-
doc.search('img').each do |i|
26-
27-
im = i.attributes['src']
28-
29-
if im and !im.match(/^(\#|javascript\:)/)
30-
begin
31-
hreq = urltohash('GET',im,request['uri'],nil)
32-
33-
insertnewpath(hreq)
34-
35-
rescue URI::InvalidURIError
36-
#puts "Parse error"
37-
#puts "Error: #{i[0]}"
20+
return unless result['Content-Type'].include?('text/html')
21+
22+
doc = Nokogiri::HTML(result.body.to_s)
23+
doc.css('img').each do |i|
24+
im = i['src']
25+
if im && !im.match(/^(\#|javascript\:)/)
26+
begin
27+
hreq = urltohash('GET', im, request['uri'], nil)
28+
insertnewpath(hreq)
29+
rescue URI::InvalidURIError
30+
end
3831
end
39-
end
32+
4033
end
4134
end
4235
end

data/msfcrawler/link.rb

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,25 @@
1010

1111
require 'rubygems'
1212
require 'pathname'
13-
require 'hpricot'
13+
require 'nokogiri'
1414
require 'uri'
1515

1616
class CrawlerLink < BaseParser
1717

1818
def parse(request,result)
19-
20-
if !result['Content-Type'].include? "text/html"
21-
return
22-
end
23-
24-
doc = Hpricot(result.body.to_s)
25-
doc.search('link').each do |link|
26-
27-
hr = link.attributes['href']
28-
29-
if hr and !hr.match(/^(\#|javascript\:)/)
30-
begin
31-
hreq = urltohash('GET',hr,request['uri'],nil)
32-
33-
insertnewpath(hreq)
34-
35-
rescue URI::InvalidURIError
36-
#puts "Parse error"
37-
#puts "Error: #{link[0]}"
19+
return unless result['Content-Type'].include?('text/html')
20+
21+
doc = Nokogiri::HTML(result.body.to_s)
22+
doc.css('link').each do |link|
23+
hr = link['href']
24+
if hr && !hr.match(/^(\#|javascript\:)/)
25+
begin
26+
hreq = urltohash('GET', hr, request['uri'], nil)
27+
insertnewpath(hreq)
28+
rescue URI::InvalidURIError
29+
end
3830
end
39-
end
31+
4032
end
4133
end
4234
end

data/msfcrawler/objects.rb

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,36 +13,25 @@
1313

1414
require 'rubygems'
1515
require 'pathname'
16-
require 'hpricot'
16+
require 'nokogiri'
1717
require 'uri'
1818

1919
class CrawlerObjects < BaseParser
2020

2121
def parse(request,result)
22-
23-
if !result['Content-Type'].include? "text/html"
24-
return
25-
end
26-
22+
return unless result['Content-Type'].include?('text/html') # TOOD: use MIXIN
2723
hr = ''
2824
m = ''
29-
30-
doc = Hpricot(result.body.to_s)
31-
doc.search("//object/embed").each do |obj|
32-
25+
doc = Nokogiri::HTML(result.body.to_s)
26+
doc.xpath("//object/embed").each do |obj|
3327
s = obj['src']
34-
3528
begin
36-
hreq = urltohash('GET',s,request['uri'],nil)
37-
29+
hreq = urltohash('GET', s, request['uri'], nil)
3830
insertnewpath(hreq)
39-
40-
4131
rescue URI::InvalidURIError
42-
#puts "Parse error"
43-
#puts "Error: #{link[0]}"
4432
end
4533
end
4634
end
35+
4736
end
4837

data/msfcrawler/scripts.rb

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,36 +13,27 @@
1313

1414
require 'rubygems'
1515
require 'pathname'
16-
require 'hpricot'
16+
require 'nokogiri'
1717
require 'uri'
1818

1919
class CrawlerScripts < BaseParser
2020

2121
def parse(request,result)
22-
23-
if !result['Content-Type'].include? "text/html"
24-
return
25-
end
22+
return unless result['Content-Type'].include? "text/html"
2623

2724
hr = ''
2825
m = ''
29-
30-
doc = Hpricot(result.body.to_s)
31-
doc.search("//script").each do |obj|
32-
26+
doc = Nokogiri::HTML(result.body.to_s)
27+
doc.xpath("//script").each do |obj|
3328
s = obj['src']
34-
3529
begin
36-
hreq = urltohash('GET',s,request['uri'],nil)
37-
30+
hreq = urltohash('GET', s, request['uri'], nil)
3831
insertnewpath(hreq)
39-
40-
4132
rescue URI::InvalidURIError
42-
#puts "Parse error"
43-
#puts "Error: #{link[0]}"
4433
end
4534
end
35+
4636
end
37+
4738
end
4839

0 commit comments

Comments
 (0)