Land rapid7#8658, Add Gather PDF Authors auxiliary module

dmohanty-r7 · dmohanty-r7 · commit 8f464e17a129 · 2017-07-07T16:20:29.000-05:00
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -31,6 +31,7 @@ PATH
       packetfu
       patch_finder
       pcaprub
+      pdf-reader
       pg (= 0.20.0)
       railties
       rb-readline
@@ -69,6 +70,7 @@ PATH
 GEM
   remote: https://rubygems.org/
   specs:
+    Ascii85 (1.0.2)
     actionpack (4.2.8)
       actionview (= 4.2.8)
       activesupport (= 4.2.8)
@@ -96,6 +98,7 @@ GEM
       tzinfo (~> 1.1)
     addressable (2.5.1)
       public_suffix (~> 2.0, >= 2.0.2)
+    afm (0.2.2)
     arel (6.0.4)
     arel-helpers (2.4.0)
       activerecord (>= 3.1.0, < 6)
@@ -166,6 +169,7 @@ GEM
     grpc (1.4.0)
       google-protobuf (~> 3.1)
       googleauth (~> 0.5.1)
+    hashery (2.1.2)
     i18n (0.8.4)
     jsobfu (0.4.2)
       rkelly-remix
@@ -236,6 +240,12 @@ GEM
       pcaprub
     patch_finder (1.0.2)
     pcaprub (0.12.4)
+    pdf-reader (2.0.0)
+      Ascii85 (~> 1.0.0)
+      afm (~> 0.2.1)
+      hashery (~> 2.0)
+      ruby-rc4
+      ttfunk
     pg (0.20.0)
     pg_array_parser (0.0.9)
     postgres_ext (3.0.0)
@@ -338,6 +348,7 @@ GEM
     rspec-rerun (1.1.0)
       rspec (~> 3.0)
     rspec-support (3.6.0)
+    ruby-rc4 (0.1.5)
     ruby_smb (0.0.18)
       bindata
       rubyntlm
@@ -365,6 +376,7 @@ GEM
     thor (0.19.4)
     thread_safe (0.3.6)
     timecop (0.9.0)
+    ttfunk (1.5.1)
     tzinfo (1.2.3)
       thread_safe (~> 0.1)
     tzinfo-data (1.2017.2)
diff --git a/documentation/modules/auxiliary/gather/http_pdf_authors.md b/documentation/modules/auxiliary/gather/http_pdf_authors.md
@@ -0,0 +1,87 @@
+This module downloads PDF files and extracts the author's name from the document metadata.
+
+## Verification Steps
+
+  1. Start `msfconsole`
+  2. Do: `use auxiliary/gather/http_pdf_authors`
+  3. Do: `set URL [URL]`
+  4. Do: `run`
+
+
+## Options
+
+**URL**
+
+The URL of a PDF to analyse.
+
+**URL_LIST**
+
+File containing a list of PDF URLs to analyze.
+
+**OUTFILE**
+
+File to store extracted author names.
+
+
+## Scenarios
+
+### URL
+
+  ```
+  msf auxiliary(http_pdf_authors) > set url http://127.0.0.1/test4.pdf
+  url => http://127.0.0.1/test4.pdf
+  msf auxiliary(http_pdf_authors) > run
+
+  [*] Processing 1 URLs...
+  [*] Downloading 'http://127.0.0.1/test4.pdf'
+  [*] HTTP 200 -- Downloaded PDF (38867 bytes)
+  [+] PDF Author: Administrator
+  [*] 100.00% done (1/1 files)
+
+  [+] Found 1 authors: Administrator
+  [*] Auxiliary module execution completed
+  ```
+
+### URL_LIST with OUTFILE
+
+  ```
+  msf auxiliary(http_pdf_authors) > set outfile /root/output
+  outfile => /root/output
+  msf auxiliary(http_pdf_authors) > set url_list /root/urls
+  url_list => /root/urls
+  msf auxiliary(http_pdf_authors) > run
+
+  [*] Processing 8 URLs...
+  [*] Downloading 'http://127.0.0.1:80/test.pdf'
+  [*] HTTP 200 -- Downloaded PDF (89283 bytes)
+  [*]  12.50% done (1/8 files)
+  [*] Downloading 'http://127.0.0.1/test2.pdf'
+  [*] HTTP 200 -- Downloaded PDF (636661 bytes)
+  [+] PDF Author: sqlmap developers
+  [*]  25.00% done (2/8 files)
+  [*] Downloading 'http://127.0.0.1/test3.pdf'
+  [*] HTTP 200 -- Downloaded PDF (167478 bytes)
+  [+] PDF Author: Evil1
+  [*]  37.50% done (3/8 files)
+  [*] Downloading 'http://127.0.0.1/test4.pdf'
+  [*] HTTP 200 -- Downloaded PDF (38867 bytes)
+  [+] PDF Author: Administrator
+  [*]  50.00% done (4/8 files)
+  [*] Downloading 'http://127.0.0.1/test5.pdf'
+  [*] HTTP 200 -- Downloaded PDF (34312 bytes)
+  [+] PDF Author: ekama
+  [*]  62.50% done (5/8 files)
+  [*] Downloading 'http://127.0.0.1/doesnotexist.pdf'
+  [*] HTTP 404 -- Downloaded PDF (289 bytes)
+  [-] Could not parse PDF: PDF is malformed
+  [*]  75.00% done (6/8 files)
+  [*] Downloading 'https://127.0.0.1/test.pdf'
+  [-] Connection failed: Failed to open TCP connection to 127.0.0.1:443 (Connection refused - connect(2) for "127.0.0.1" port 443)
+  [*] Downloading 'https://127.0.0.1:80/test.pdf'
+  [-] Connection failed: SSL_connect returned=1 errno=0 state=unknown state: unknown protocol
+
+  [+] Found 4 authors: sqlmap developers, Evil1, Administrator, ekama
+  [*] Writing data to /root/output...
+  [*] Auxiliary module execution completed
+  ```
+
diff --git a/metasploit-framework.gemspec b/metasploit-framework.gemspec
@@ -111,6 +111,12 @@ Gem::Specification.new do |spec|
     spec.add_runtime_dependency 'xmlrpc'
   end
 
+  #
+  # File Parsing Libraries
+  #
+  # Needed by auxiliary/gather/http_pdf_authors module
+  spec.add_runtime_dependency 'pdf-reader'
+
   #
   # Protocol Libraries
   #
diff --git a/modules/auxiliary/gather/http_pdf_authors.rb b/modules/auxiliary/gather/http_pdf_authors.rb
@@ -0,0 +1,183 @@
+##
+# This module requires Metasploit: http://metasploit.com/download
+# Current source: https://github.com/rapid7/metasploit-framework
+##
+
+require 'pdf-reader'
+
+class MetasploitModule < Msf::Auxiliary
+
+  def initialize(info = {})
+    super(update_info(info,
+      'Name'        => 'Gather PDF Authors',
+      'Description' => %q{
+        This module downloads PDF files and extracts the author's
+        name from the document metadata.
+      },
+      'License'     => MSF_LICENSE,
+      'Author'      => 'Brendan Coles <bcoles[at]gmail.com>'))
+    register_options(
+      [
+        OptString.new('URL', [ false, 'The URL of a PDF to analyse', '' ]),
+        OptString.new('URL_LIST', [ false, 'File containing a list of PDF URLs to analyze', '' ]),
+        OptString.new('OUTFILE', [ false, 'File to store output', '' ])
+      ])
+    register_advanced_options(
+      [
+        OptString.new('SSL_VERIFY', [ true, 'Verify SSL certificate', true ]),
+        OptString.new('PROXY', [ false, 'Proxy server to route connection. <host>:<port>', nil ]),
+        OptString.new('PROXY_USER', [ false, 'Proxy Server User', nil ]),
+        OptString.new('PROXY_PASS', [ false, 'Proxy Server Password', nil ])
+      ])
+  end
+
+  def progress(current, total)
+    done = (current.to_f / total.to_f) * 100
+    percent = "%3.2f%%" % done.to_f
+    print_status "%7s done (%d/%d files)" % [percent, current, total]
+  end
+
+  def load_urls
+    return [ datastore['URL'] ] unless datastore['URL'].to_s.eql? ''
+
+    if datastore['URL_LIST'].to_s.eql? ''
+      fail_with Failure::BadConfig, 'No URL(s) specified'
+    end
+
+    unless File.file? datastore['URL_LIST'].to_s
+      fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exit"
+    end
+
+    File.open(datastore['URL_LIST'], 'rb') {|f| f.read}.split(/\r?\n/)
+  end
+
+  def read(data)
+    begin
+      reader = PDF::Reader.new data
+      return parse reader
+    rescue PDF::Reader::MalformedPDFError
+      print_error "Could not parse PDF: PDF is malformed"
+      return
+    rescue PDF::Reader::UnsupportedFeatureError
+      print_error "Could not parse PDF: PDF::Reader::UnsupportedFeatureError"
+      return
+    rescue => e
+      print_error "Could not parse PDF: Unhandled exception: #{e}"
+      return
+    end
+  end
+
+  def parse(reader)
+    # PDF
+    #print_status "PDF Version: #{reader.pdf_version}"
+    #print_status "PDF Title: #{reader.info['title']}"
+    #print_status "PDF Info: #{reader.info}"
+    #print_status "PDF Metadata: #{reader.metadata}"
+    #print_status "PDF Pages: #{reader.page_count}"
+
+    # Software
+    #print_status "PDF Creator: #{reader.info[:Creator]}"
+    #print_status "PDF Producer: #{reader.info[:Producer]}"
+
+    # Author
+    reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : ''
+  end
+
+  def download(url)
+    print_status "Downloading '#{url}'"
+
+    begin
+      target = URI.parse url
+      raise 'Invalid URL' unless target.scheme =~ %r{https?}
+      raise 'Invalid URL' if target.host.to_s.eql? ''
+    rescue => e
+      print_error "Could not parse URL: #{e}"
+      return
+    end
+
+    clnt = Net::HTTP::Proxy(@proxysrv, @proxyport, @proxyuser, @proxypass).new(target.host, target.port)
+
+    if target.scheme.eql? 'https'
+      clnt.use_ssl = true
+      clnt.verify_mode = datastore['SSL_VERIFY'] ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
+    end
+
+    headers = {
+      'User-Agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/4.0.221.6 Safari/525.13'
+    }
+
+    begin
+      res = clnt.get2 target.request_uri, headers
+    rescue => e
+      print_error "Connection failed: #{e}"
+      return
+    end
+
+    unless res
+      print_error 'Connection failed'
+      return
+    end
+
+    print_status "HTTP #{res.code} -- Downloaded PDF (#{res.body.length} bytes)"
+
+    contents = StringIO.new
+    contents.puts res.body
+    contents
+  end
+
+  def write_output(data)
+    return if datastore['OUTFILE'].to_s.eql? ''
+
+    print_status "Writing data to #{datastore['OUTFILE']}..."
+    file_name = datastore['OUTFILE']
+
+    if FileTest::exist?(file_name)
+      print_status 'OUTFILE already exists, appending..'
+    end
+
+    File.open(file_name, 'ab') do |fd|
+      fd.write(data)
+    end
+  end
+
+  def run
+    if datastore['PROXY']
+      @proxysrv, @proxyport = datastore['PROXY'].split(':')
+      @proxyuser = datastore['PROXY_USER']
+      @proxypass = datastore['PROXY_PASS']
+    else
+      @proxysrv, @proxyport = nil, nil
+    end
+
+    urls = load_urls
+    print_status "Processing #{urls.size} URLs..."
+    authors = []
+    max_len = 256
+    urls.each_with_index do |url, index|
+      next if url.blank?
+      contents = download url
+      next if contents.blank?
+      author = read contents
+      unless author.blank?
+        print_good "PDF Author: #{author}"
+        if author.length > max_len
+          print_warning "Warning: Truncated author's name at #{max_len} characters"
+          authors << author[0...max_len]
+        else
+          authors << author
+        end
+      end
+      progress(index + 1, urls.size)
+    end
+
+    print_line
+
+    if authors.empty?
+      print_status 'Found no authors'
+      return
+    end
+
+    print_good "Found #{authors.size} authors: #{authors.join ', '}"
+    write_output authors.join "\n"
+  end
+end

Original file line number	Diff line number	Diff line change
`@@ -111,6 +111,12 @@ Gem::Specification.new do \|spec\|`
`111`	`111`	`spec.add_runtime_dependency 'xmlrpc'`
`112`	`112`	`end`
`113`	`113`
	`114`	`+ #`
	`115`	`+ # File Parsing Libraries`
	`116`	`+ #`
	`117`	`+ # Needed by auxiliary/gather/http_pdf_authors module`
	`118`	`+ spec.add_runtime_dependency 'pdf-reader'`
	`119`	`+`
`114`	`120`	`#`
`115`	`121`	`# Protocol Libraries`
`116`	`122`	`#`