@@ -76,7 +76,7 @@ def cleanup
76
76
super
77
77
# clear my resource, deregister ref, stop/close the HTTP socket
78
78
begin
79
- @http_service . remove_resource ( "/grab" )
79
+ @http_service . remove_resource ( collect_data_uri )
80
80
@http_service . deref
81
81
@http_service . stop
82
82
@http_service . close
@@ -140,7 +140,7 @@ def start_http(opts={})
140
140
'Proc' => Proc . new { |cli , req |
141
141
on_request_uri ( cli , req )
142
142
} ,
143
- 'Path' => "/grab"
143
+ 'Path' => collect_data_uri
144
144
} . update ( opts [ 'Uri' ] || { } )
145
145
146
146
proto = ( datastore [ "SSL" ] ? "https" : "http" )
@@ -715,24 +715,43 @@ def all_script_urls(pages)
715
715
716
716
# @return [Array<Array<String>>] list of URLs for remote javascripts that are cacheable
717
717
def find_cached_scripts
718
- cached_scripts = all_script_urls ( urls ) . map do |urls_for_site |
718
+ cached_scripts = all_script_urls ( urls ) . each_with_index . map do |urls_for_site , i |
719
+ begin
720
+ page_uri = URI . parse ( urls [ i ] )
721
+ rescue URI ::InvalidURIError => e
722
+ next
723
+ end
724
+
719
725
results = urls_for_site . uniq . map do |url |
720
- print_status "URL: #{ url } "
721
- io = open url
722
- # parse some HTTP headers and do type coercions
723
- last_modified = io . last_modified
724
- expires = Time . parse ( io . meta [ 'expires' ] ) rescue nil
725
- cache_control = io . meta [ 'cache-control' ] || ''
726
- charset = io . charset
727
- etag = io . meta [ 'etag' ]
728
- # lets see if we are able to "poison" the cache for this asset...
729
- if ( !expires . nil? && Time . now < expires ) or
730
- ( cache_control . length > 0 ) or # if asset is cacheable
731
- ( last_modified . length > 0 )
732
- print_status ( "Found cacheable #{ url } " )
733
- io . meta . merge ( :body => io . read , :url => url )
734
- else
735
- nil
726
+ begin
727
+ print_status "URL: #{ url } "
728
+ begin
729
+ script_uri = URI . parse ( url )
730
+ if script_uri . relative?
731
+ url = page_uri + url
732
+ end
733
+ io = open ( url )
734
+ rescue URI ::InvalidURIError => e
735
+ next
736
+ end
737
+
738
+ # parse some HTTP headers and do type coercions
739
+ last_modified = io . last_modified
740
+ expires = Time . parse ( io . meta [ 'expires' ] ) rescue nil
741
+ cache_control = io . meta [ 'cache-control' ] || ''
742
+ charset = io . charset
743
+ etag = io . meta [ 'etag' ]
744
+ # lets see if we are able to "poison" the cache for this asset...
745
+ if ( !expires . nil? && Time . now < expires ) or
746
+ ( cache_control . length > 0 ) or # if asset is cacheable
747
+ ( not last_modified . nil? and last_modified . to_s . length > 0 )
748
+ print_status ( "Found cacheable #{ url } " )
749
+ io . meta . merge ( :body => io . read , :url => url )
750
+ else
751
+ nil
752
+ end
753
+ rescue Errno ::ENOENT => e # lots of things can go wrong here.
754
+ next
736
755
end
737
756
end
738
757
results . compact # remove nils
@@ -745,7 +764,9 @@ def find_cached_scripts
745
764
746
765
# @return [String] the path to send data back to
747
766
def collect_data_uri
748
- "/grab"
767
+ path = datastore [ "URI_PATH" ]
768
+ path = if not path or path . empty? then '/grab' end
769
+ if path . starts_with '/' then path else "/#{ path } " end
749
770
end
750
771
751
772
# @return [String] formatted http/https URL of the listener
@@ -780,9 +801,9 @@ def urls
780
801
# @param [String] input the unencoded string
781
802
# @return [String] input with dangerous chars replaced with xml entities
782
803
def escape_xml ( input )
783
- input . gsub ( "&" , "&" ) . gsub ( "<" , "<" )
784
- . gsub ( ">" , ">" ) . gsub ( "'" , "'" )
785
- . gsub ( "\" " , """ )
804
+ input . to_s . gsub ( "&" , "&" ) . gsub ( "<" , "<" )
805
+ . gsub ( ">" , ">" ) . gsub ( "'" , "'" )
806
+ . gsub ( "\" " , """ )
786
807
end
787
808
788
809
def should_steal_cookies?
0 commit comments