diff --git a/app/controllers/impressionist_controller.rb b/app/controllers/impressionist_controller.rb index aa15edb3..65f64344 100644 --- a/app/controllers/impressionist_controller.rb +++ b/app/controllers/impressionist_controller.rb @@ -1,75 +1,61 @@ -require 'digest/sha2' +# frozen_string_literal: true +require 'digest/sha2' module ImpressionistController module ClassMethods - def impressionist(opts={}) - if Rails::VERSION::MAJOR >= 5 - before_action { |c| c.impressionist_subapp_filter(opts) } - else - before_filter { |c| c.impressionist_subapp_filter(opts) } - end + def impressionist(opts = {}) + before_action { |c| c.impressionist_subapp_filter(opts) } end end module InstanceMethods def self.included(base) - if Rails::VERSION::MAJOR >= 5 - base.before_action :impressionist_app_filter - else - base.before_filter :impressionist_app_filter - end + base.before_action :impressionist_app_filter end - def impressionist(obj,message=nil,opts={}) - if should_count_impression?(opts) - if obj.respond_to?("impressionable?") - if unique_instance?(obj, opts[:unique]) - obj.impressions.create(associative_create_statement({:message => message})) - end - else - # we could create an impression anyway. for classes, too. why not? - raise "#{obj.class.to_s} is not impressionable!" + def impressionist(obj, message = nil, opts = {}) + return unless should_count_impression?(opts) + + if obj.respond_to?("impressionable?") + if unique_instance?(obj, opts[:unique]) + obj.impressions.create(associative_create_statement({ message: message })) end + else + raise "#{obj.class} is not impressionable!" end end def impressionist_app_filter - @impressionist_hash = Digest::SHA2.hexdigest(Time.now.to_f.to_s+rand(10000).to_s) + @impressionist_hash = Digest::SHA2.hexdigest("#{Time.now.to_f}#{SecureRandom.hex(16)}") end def impressionist_subapp_filter(opts = {}) - if should_count_impression?(opts) - actions = opts[:actions] - actions.collect!{|a|a.to_s} unless actions.blank? - if (actions.blank? || actions.include?(action_name)) && unique?(opts[:unique]) - Impression.create(direct_create_statement) - end + return unless should_count_impression?(opts) + + actions = opts[:actions] + actions&.collect!(&:to_s) + + if (actions.blank? || actions.include?(action_name)) && unique?(opts[:unique]) + Impression.create(direct_create_statement) end end protected - # creates a statment hash that contains default values for creating an impression via an AR relation. - def associative_create_statement(query_params={}) - # support older versions of rails: - # see https://github.com/rails/rails/pull/34039 - if Rails::VERSION::MAJOR < 6 - filter = ActionDispatch::Http::ParameterFilter.new(Rails.application.config.filter_parameters) - else - filter = ActiveSupport::ParameterFilter.new(Rails.application.config.filter_parameters) - end + def associative_create_statement(query_params = {}) + filter = ActiveSupport::ParameterFilter.new(Rails.application.config.filter_parameters) query_params.reverse_merge!( - :controller_name => controller_name, - :action_name => action_name, - :user_id => user_id, - :request_hash => @impressionist_hash, - :session_hash => session_hash, - :ip_address => request.remote_ip, - :referrer => request.referer, - :params => filter.filter(params_hash) - ) + controller_name: controller_name, + action_name: action_name, + user_id: user_id, + request_hash: @impressionist_hash, + session_hash: session_hash, + ip_address: sanitized_ip_address, + referrer: sanitized_referrer, + params: sanitized_params(filter) + ) end private @@ -91,15 +77,15 @@ def condition_false?(condition) end def conditional?(condition) - condition.is_a?(Symbol) ? self.send(condition) : condition.call + condition.is_a?(Symbol) ? send(condition) : condition.call end def unique_instance?(impressionable, unique_opts) - return unique_opts.blank? || !impressionable.impressions.where(unique_query(unique_opts, impressionable)).exists? + unique_opts.blank? || !impressionable.impressions.where(unique_query(unique_opts, impressionable)).exists? end def unique?(unique_opts) - return unique_opts.blank? || check_impression?(unique_opts) + unique_opts.blank? || check_impression?(unique_opts) end def check_impression?(unique_opts) @@ -118,37 +104,90 @@ def unique_opts_has_params?(unique_opts) def check_unique_with_params?(impressions) request_param = params_hash - impressions.detect{|impression| impression.params == request_param }.nil? + impressions.detect { |impression| impression.params == request_param }.nil? end - # creates the query to check for uniqueness - def unique_query(unique_opts,impressionable=nil) - full_statement = direct_create_statement({},impressionable) - # reduce the full statement to the params we need for the specified unique options + def unique_query(unique_opts, impressionable = nil) + full_statement = direct_create_statement({}, impressionable) unique_opts.reduce({}) do |query, param| query[param] = full_statement[param] query end end - # creates a statment hash that contains default values for creating an impression. - def direct_create_statement(query_params={},impressionable=nil) + def direct_create_statement(query_params = {}, impressionable = nil) query_params.reverse_merge!( - :impressionable_type => controller_name.singularize.camelize, - :impressionable_id => impressionable.present? ? impressionable.id : params[:id] - ) + impressionable_type: sanitized_impressionable_type, + impressionable_id: impressionable.present? ? impressionable.id : sanitized_impressionable_id + ) associative_create_statement(query_params) end + def sanitized_ip_address + return nil unless Impressionist.log_ip_address + + ip = request.remote_ip.to_s + return nil if ip.blank? + + if ip.match?(/\A(?:\d{1,3}\.){3}\d{1,3}\z/) || ip.match?(/\A[a-fA-F0-9:]+\z/) + ip.slice(0, 45) + end + end + + def sanitized_referrer + return nil unless Impressionist.log_referrer + + referrer = request.referer.to_s + return nil if referrer.blank? + + begin + uri = URI.parse(referrer.slice(0, 2048)) + uri.to_s if uri.scheme&.match?(/\Ahttps?\z/) + rescue URI::InvalidURIError + nil + end + end + + def sanitized_params(filter) + return {} unless Impressionist.log_params + + filtered = filter.filter(params_hash) + json = filtered.to_json + return {} if json.bytesize > Impressionist.max_params_size + + filtered + end + + def sanitized_impressionable_type + type = controller_name.singularize.camelize + return nil unless type.match?(/\A[A-Za-z][A-Za-z0-9_:]*\z/) + + type + end + + def sanitized_impressionable_id + id = params[:id] + return nil if id.blank? + + if id.to_s.match?(/\A\d+\z/) + id.to_i + elsif id.to_s.match?(/\A[a-f0-9\-]{36}\z/i) + id.to_s + end + end + def session_hash + return nil unless Impressionist.log_session_hash + id = session.id || request.session_options[:id] + return nil if id.nil? if id.respond_to?(:cookie_value) - id.cookie_value + id.cookie_value.to_s.slice(0, 255) elsif id.is_a?(Rack::Session::SessionId) - id.public_id + id.public_id.to_s.slice(0, 255) else - id.to_s + id.to_s.slice(0, 255) end end @@ -156,11 +195,10 @@ def params_hash request.params.except(:controller, :action, :id) end - #use both @current_user and current_user helper def user_id user_id = @current_user&.id rescue nil user_id = current_user&.id rescue nil if user_id.blank? user_id end end -end +end \ No newline at end of file diff --git a/app/models/impressionist/bots.rb b/app/models/impressionist/bots.rb index aee7cf9c..b6f29d34 100644 --- a/app/models/impressionist/bots.rb +++ b/app/models/impressionist/bots.rb @@ -1,1468 +1,43 @@ +# frozen_string_literal: true + +require 'set' + module Impressionist module Bots def self.bot?(user_agent = nil) return false if user_agent.nil? - WILD_CARDS.any? { |wc| user_agent.downcase.include?(wc) } || LIST.include?(user_agent) + + ua_downcase = user_agent.downcase + + # Real browsers are not bots - check this first + return false if real_browser?(ua_downcase) + + WILD_CARDS.any? { |wc| ua_downcase.include?(wc) } || LIST.include?(user_agent) + end + + def self.real_browser?(ua) + return false if ua.nil? + + # Must have Mozilla/5.0 AND a browser engine + return false unless ua.include?('mozilla/5.0') + + browser_engines = ['applewebkit/', 'gecko/', 'presto/', 'trident/'] + has_engine = browser_engines.any? { |engine| ua.include?(engine) } + + browser_identifiers = ['chrome/', 'safari/', 'firefox/', 'edg/', 'opr/', 'msie ', 'rv:11'] + has_browser = browser_identifiers.any? { |browser| ua.include?(browser) } + + has_engine && has_browser end - WILD_CARDS = ["bot","yahoo","slurp","google","msn","crawler"] + # Removed 'google', 'yahoo', 'msn' - they match real browsers + WILD_CARDS = %w[bot crawler spider slurp scraper fetch nutch wget curl archiver transcoder].freeze - LIST = [" UnChaos From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)", + LIST = Set.new([ + " UnChaos From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)", " UnChaos Bot Hybrid Web Search Engine. (vadim_gonchar@unchaos.com)", - " UnChaosBot From Chaos To Order UnChaos Hybrid Web Search Engine at www.unchaos.com (info@unchaos.com)", - " http://www.sygol.com", - "*/Nutch-0.9-dev", - "+SitiDi.net/SitiDiBot/1.0 (+Have Good Day)", - "-DIE-KRAEHE- META-SEARCH-ENGINE/1.1 http://www.die-kraehe.de", - "192.comAgent", - "4anything.com LinkChecker v2.0", - "8484 Boston Project v 1.0", - ":robot/1.0 (linux) ( admin e-mail: undefined http://www.neofonie.de/loesungen/search/robot.html )", - "A-Online Search", - "A1 Sitemap Generator/1.0 (+http://www.micro-sys.dk/products/sitemap-generator/) miggibot/2006.01.24", - "aardvark-crawler", - "AbachoBOT", - "AbachoBOT (Mozilla compatible)", - "ABCdatos BotLink/5.xx.xxx#BBL", - "Aberja Checkomat", - "abot/0.1 (abot; http://www.abot.com; abot@abot.com)", - "About/0.1libwww-perl/5.47", - "Accelatech RSSCrawler/0.4", - "accoona", - "Accoona-AI-Agent/1.1.1 (crawler at accoona dot com)", - "Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com)", - "Ack (http://www.ackerm.com/)", - "AcoiRobot", - "Acoon Robot v1.50.001", - "Acoon Robot v1.52 (http://www.acoon.de)", - "Acoon-Robot 4.0.x.[xx] (http://www.acoon.de)", - "Acoon-Robot v3.xx (http://www.acoon.de and http://www.acoon.com)", - "Acorn/Nutch-0.9 (Non-Profit Search Engine; acorn.isara.org; acorn at isara dot org)", - "AESOP_com_SpiderMan", - "agadine/1.x.x (+http://www.agada.de)", - "Agent-SharewarePlazaFileCheckBot/2.0+(+http://www.SharewarePlaza.com)", - "AgentName/0.1 libwww-perl/5.48", - "AIBOT/2.1 By +(www.21seek.com A Real artificial intelligence search engine China)", - "aipbot/1.0 (aipbot; http://www.aipbot.com; aipbot@aipbot.com)", - "aipbot/2-beta (aipbot dev; http://aipbot.com; aipbot@aipbot.com)", - "Aladin/3.324", - "Aleksika Spider/1.0 (+http://www.aleksika.com/)", - "AlkalineBOT/1.3", - "AlkalineBOT/1.4 (1.4.0326.0 RTM)", - "Allesklar/0.1 libwww-perl/5.46", - "Allrati/1.1 (+)", - "AltaVista Intranet V2.0 AVS EVAL search@freeit.com", - "AltaVista Intranet V2.0 Compaq Altavista Eval sveand@altavista.net", - "AltaVista Intranet V2.0 evreka.com crawler@evreka.com", - "AltaVista V2.0B crawler@evreka.com", - "AmfibiBOT", - "Amfibibot/0.06 (Amfibi Web Search; http://www.amfibi.com; agent@amfibi.com)", - "Amfibibot/0.07 (Amfibi Robot; http://www.amfibi.com; agent@amfibi.com)", - "amibot", - "AnnoMille spider 0.1 alpha - http://www.annomille.it", - "AnswerBus (http://www.answerbus.com/)", - "antibot-V1.1.5/i586-linux-2.2", - "AnzwersCrawl/2.0 (anzwerscrawl@anzwers.com.au;Engine)", - "Apexoo Spider 1.x", - "Aport", - "appie 1.1 (www.walhello.com)", - "ArabyBot (compatible; Mozilla/5.0; GoogleBot; FAST Crawler 6.4; http://www.araby.com;)", - "ArachBot", - "Arachnoidea (arachnoidea@euroseek.com)", - "ArchitextSpider", - "archive.org_bot", - "Arikus_Spider", - "Arquivo-web-crawler (compatible; heritrix/1.12.1 +http://arquivo-web.fccn.pt)", - "ASAHA Search Engine Turkey V.001 (http://www.asaha.com/)", - "Asahina-Antenna/1.x", - "Asahina-Antenna/1.x (libhina.pl/x.x ; libtime.pl/x.x)", - "ask.24x.info", - "AskAboutOil/0.06-rcp (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@askaboutoil.com)", - "asked/Nutch-0.8 (web crawler; http://asked.jp; epicurus at gmail dot com)", - "ASPSeek/1.2.5", - "ASPseek/1.2.9d", - "ASPSeek/1.2.x", - "ASPSeek/1.2.xa", - "ASPseek/1.2.xx", - "ASPSeek/1.2.xxpre", - "ASSORT/0.10", - "asterias/2.0", - "AtlocalBot/1.1 +(http://www.atlocal.com/local-web-site-owner.html)", - "Atomic_Email_Hunter/4.0", - "Atomz/1.0", - "atSpider/1.0", - "Attentio/Nutch-0.9-dev (Attentio's beta blog crawler; www.attentio.com; info@attentio.com)", - "augurfind", - "augurnfind V-1.x", - "autoemailspider", - "autowebdir 1.1 (www.autowebdir.com)", - "AV Fetch 1.0", - "AVSearch-1.0(peter.turney@nrc.ca)", - "AVSearch-3.0(AltaVista/AVC)", - "axadine/ (Axadine Crawler; http://www.axada.de/; )", - "AxmoRobot - Crawling your site for better indexing on www.axmo.com search engine.", - "BabalooSpider/1.3 (BabalooSpider; http://www.babaloo.si; spider@babaloo.si)", - "BaboomBot/1.x.x (+http://www.baboom.us)", - "BaiduImagespider+(+http://www.baidu.jp/search/s308.html)", - "BaiDuSpider", - "Baiduspider+(+http://help.baidu.jp/system/05.html)", - "Baiduspider+(+http://www.baidu.com/search/spider.htm)", - "Baiduspider+(+http://www.baidu.com/search/spider_jp.html)", - "Balihoo/Nutch-1.0-dev (Crawler for Balihoo.com search engine - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)", - "BarraHomeCrawler (albertof@barrahome.org)", - "bdcindexer_2.6.2 (research@bdc)", - "BDFetch", - "BDNcentral Crawler v2.3 [en] (http://www.bdncentral.com/robot.html) (X11; I; Linux 2.0.44 i686)", - "beautybot/1.0 (+http://www.uchoose.de/crawler/beautybot/)", - "BebopBot/2.5.1 ( crawler http://www.apassion4jazz.net/bebopbot.html )", - "BigCliqueBOT/1.03-dev (bigclicbot; http://www.bigclique.com; bot@bigclique.com)", - "BIGLOTRON (Beta 2;GNU/Linux)", - "Bigsearch.ca/Nutch-x.x-dev (Bigsearch.ca Internet Spider; http://www.bigsearch.ca/; info@enhancededge.com)", - "BilgiBetaBot/0.8-dev (bilgi.com (Beta) ; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)", - "BilgiBot/1.0(beta) (http://www.bilgi.com/; bilgi at bilgi dot com)", - "Bitacle bot/1.1", - "Bitacle Robot (V:1.0;) (http://www.bitacle.com)", - "BlackWidow", - "Blaiz-Bee/1.0 (+http://www.blaiz.net)", - "Blaiz-Bee/2.00.8222 (BE Internet Search Engine http://www.rawgrunt.com)", - "Blaiz-Bee/2.00.xxxx (+http://www.blaiz.net)", - "BlitzBOT@tricus.net", - "BlitzBOT@tricus.net (Mozilla compatible)", - "BlogBot/1.x", - "Bloglines Title Fetch/1.0 (http://www.bloglines.com)", - "Bloglines-Images/0.1 (http://www.bloglines.com)", - "Bloglines/3.1 (http://www.bloglines.com)", - "Blogpulse (info@blogpulse.com)", - "BlogPulseLive (support@blogpulse.com)", - "BlogSearch/1.x +http://www.icerocket.com/", - "blogsearchbot-pumpkin-3", - "BlogsNowBot, V 2.01 (+http://www.blogsnow.com/)", - "BlogVibeBot-v1.1 (spider@blogvibe.nl)", - "blogWatcher_Spider/0.1 (http://www.lr.pi.titech.ac.jp/blogWatcher/)", - "BlogzIce/1.0 (+http://icerocket.com; rhodes@icerocket.com)", - "BlogzIce/1.0 +http://www.icerocket.com/", - "BloobyBot", - "Bloodhound/Nutch-0.9 (Testing Crawler for Research - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)", - "boitho.com-dc/0.xx (http://www.boitho.com/dcbot.html)", - "boitho.com-robot/1.x", - "boitho.com-robot/1.x (http://www.boitho.com/bot.html)", - "BPImageWalker/2.0 (www.bdbrandprotect.com)", - "BravoBrian SpiderEngine MarcoPolo", - "BruinBot (+http://webarchive.cs.ucla.edu/bruinbot.html) ", - "BSDSeek/1.0", - "BTbot/0.x (+http://www.btbot.com/btbot.html)", - "BuildCMS crawler (http://www.buildcms.com/crawler)", - "BullsEye", - "bumblebee@relevare.com", - "BurstFindCrawler/1.1 (crawler.burstfind.com; http://crawler.burstfind.com; crawler@burstfind.com)", - "Buscaplus Robi/1.0 (http://www.buscaplus.com/robi/)", - "bwh3_user_agent", - "Cabot/Nutch-0.9 (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)", - "Cabot/Nutch-1.0-dev (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)", - "carleson/1.0", - "Carnegie_Mellon_University_Research_WebBOT-->PLEASE READ-->http://www.andrew.cmu.edu/~brgordon/webbot/index.html http://www.andrew.cmu.edu/~brgordon/webbot/index.html", - "Carnegie_Mellon_University_WebCrawler http://www.andrew.cmu.edu/~brgordon/webbot/index.html", - "Catall Spider", - "CazoodleBot/CazoodleBot-0.1 (CazoodleBot Crawler; http://www.cazoodle.com/cazoodlebot; cazoodlebot@cazoodle.com)", - "CCBot/1.0 (+http://www.commoncrawl.org/bot.html)", - "ccubee/x.x", - "Ceramic Tile Installation Guide (http://www.floorstransformed.com)", - "cfetch/1.0", - "China Local Browse 2.6", - "ChristCRAWLER 2.0", - "CipinetBot (http://www.cipinet.com/bot.html)", - "ClariaBot/1.0", - "Claymont.com", - "CloakDetect/0.9 (+http://fulltext.seznam.cz/)", - "Clushbot/2.x (+http://www.clush.com/bot.html)", - "Clushbot/3.x-BinaryFury (+http://www.clush.com/bot.html)", - "Clushbot/3.xx-Ajax (+http://www.clush.com/bot.html)", - "Clushbot/3.xx-Hector (+http://www.clush.com/bot.html)", - "Clushbot/3.xx-Peleus (+http://www.clush.com/bot.html)", - "Cogentbot/1.X (+http://www.cogentsoftwaresolutions.com/bot.html)", - "combine/0.0", - "Combine/2.0 http://combine.it.lth.se/", - "Combine/3 http://combine.it.lth.se/", - "Combine/x.0", - "cometrics-bot, http://www.cometrics.de", - "Computer_and_Automation_Research_Institute_Crawler crawler@ilab.sztaki.hu", - "Comrite/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)", - "ContactBot/0.2", - "ContentSmartz", - "Convera Internet Spider V6.x", - "ConveraCrawler/0.2", - "ConveraCrawler/0.9d (+http://www.authoritativeweb.com/crawl)", - "ConveraMultiMediaCrawler/0.1 (+http://www.authoritativeweb.com/crawl)", - "CoolBot", - "cosmos/0.8_(robot@xyleme.com)", - "cosmos/0.9_(robot@xyleme.com)", - "CougarSearch/0.x (+http://www.cougarsearch.com/faq.shtml)", - "Covac TexAs Arachbot", - "Cowbot-0.1 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)", - "Cowbot-0.1.x (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)", - "CrawlConvera0.1 (CrawlConvera@yahoo.com)", - "Crawler (cometsearch@cometsystems.com)", - "Crawler admin@crawler.de", - "Crawler V 0.2.x admin@crawler.de", - "crawler@alexa.com", - "CrawlerBoy Pinpoint.com", - "Crawllybot/0.1 (Crawllybot; +http://www.crawlly.com; crawler@crawlly.com)", - "CreativeCommons/0.06-dev (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@lists.sourceforge.net)", - "CrocCrawler vx.3 [en] (http://www.croccrawler.com) (X11; I; Linux 2.0.44 i686)", - "csci_b659/0.13", - "Cuasarbot/0.9b http://www.cuasar.com/spider_beta/ ", - "CurryGuide SiteScan 1.1", - "Custom Spider www.bisnisseek.com /1.0", - "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)", - "CydralSpider/1.x (Cydral Web Image Search; http://www.cydral.com)", - "CydralSpider/3.0 (Cydral Image Search; http://www.cydral.com)", - "DataCha0s/2.0", - "DataCha0s/2.0", - "DataFountains/DMOZ Downloader", - "DataFountains/Dmoz Downloader (http://ivia.ucr.edu/useragents.shtml)", - "DataFountains/DMOZ Feature Vector Corpus Creator (http://ivia.ucr.edu/useragents.shtml)", - "DataparkSearch/4.47 (+http://dataparksearch.org/bot)", - "DataparkSearch/4.xx (http://www.dataparksearch.org/)", - "DataSpear/1.0 (Spider; http://www.dataspear.com/spider.html; spider@dataspear.com)", - "DataSpearSpiderBot/0.2 (DataSpear Spider Bot; http://dssb.dataspear.com/bot.html; dssb@dataspear.com)", - "DatenBot( http://www.sicher-durchs-netz.de/bot.html)", - "DaviesBot/1.7 (www.wholeweb.net)", - "daypopbot/0.x", - "dbDig(http://www.prairielandconsulting.com)", - "DBrowse 1.4b", - "DBrowse 1.4d", - "dCSbot/1.1", - "de.searchengine.comBot 1.2 (http://de.searchengine.com/spider)", - "deepak-USC/ISI", - "DeepIndex", - "DeepIndex ( http://www.zetbot.com )", - "DeepIndex (www.en.deepindex.com)", - "DeepIndexer.ca", - "Demo Bot DOT 16b", - "Demo Bot Z 16b", - "Denmex websearch (http://search.denmex.com)", - "dev-spider2.searchpsider.com/1.3b", - "DiaGem/1.1 (http://www.skyrocket.gr.jp/diagem.html)", - "Diamond/x.0", - "DiamondBot", - "Digger/1.0 JDK/1.3.0rc3", - "DigOut4U", - "DIIbot/1.2", - "disco/Nutch-0.9 (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)", - "disco/Nutch-1.0-dev (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)", - "DittoSpyder", - "dloader(NaverRobot)/1.0", - "DoCoMo/1.0/Nxxxi/c10", - "DoCoMo/1.0/Nxxxi/c10/TB", - "DoCoMo/2.0 P900iV(c100;TB;W24H11) ", - "DoCoMo/2.0 SH902i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)", - "DoCoMo/2.0/SO502i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)", - "dodgebot/experimental", - "Download-Tipp Linkcheck (http://download-tipp.de/)", - "Drecombot/1.0 (http://career.drecom.jp/bot.html)", - "DSurf15a 01", - "DSurf15a 71", - "DSurf15a 81", - "DSurf15a VA", - "dtSearchSpider", - "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)", - "Dumbot(version 0.1 beta - dumbfind.com)", - "Dumbot(version 0.1 beta - http://www.dumbfind.com/dumbot.html)", - "Dumbot(version 0.1 beta)", - "e-sense 1.0 ea(www.vigiltech.com/esensedisclaim.html)", - "e-SocietyRobot(http://www.yama.info.waseda.ac.jp/~yamana/es/)", - "eApolloBot/2.0 (compatible; heritrix/2.0.0-SNAPSHOT-20071024.170148 +http://www.eapollo-opto.com)", - "EARTHCOM.info/1.x [www.earthcom.info]", - "EARTHCOM.info/1.xbeta [www.earthcom.info]", - "EasyDL/3.xx", - "EasyDL/3.xx http://keywen.com/Encyclopedia/Bot", - "EBrowse 1.4b", - "EchO!/2.0", - "Educate Search VxB", - "egothor/3.0a (+http://www.xdefine.org/robot.html)", - "EgotoBot/4.8 (+http://www.egoto.com/about.htm)", - "ejupiter.com", - "elfbot/1.0 (+http://www.uchoose.de/crawler/elfbot/)", - "ELI/20070402:2.0 (DAUM RSS Robot, Daum Communications Corp.; +http://ws.daum.net/aboutkr.html)", - "EmailSiphon", - "EmailSpider", - "EmailWolf 1.00", - "EMPAS_ROBOT", - "EnaBot/1.x (http://www.enaball.com/crawler.html)", - "Enfish Tracker", - "Enterprise_Search/1.0", - "Enterprise_Search/1.0.xxx", - "Enterprise_Search/1.00.xxx;MSSQL (http://www.innerprise.net/es-spider.asp)", - "envolk/1.7 (+http://www.envolk.com/envolkspiderinfo.php)", - "envolk[ITS]spider/1.6(+http://www.envolk.com/envolkspider.html)", - "EroCrawler", - "ES.NET_Crawler/2.0 (http://search.innerprise.net/)", - "eseek-larbin_2.6.2 (crawler@exactseek.com)", - "ESISmartSpider", - "eStyleSearch 4 (compatible; MSIE 6.0; Windows NT 5.0)", - "ESurf15a 15", - "EuripBot/0.x (+http://www.eurip.com) GetFile", - "EuripBot/0.x (+http://www.eurip.com) GetRobots", - "EuripBot/0.x (+http://www.eurip.com) PreCheck", - "Eurobot/1.0 (http://www.ayell.eu)", - "EvaalSE - bot@evaal.com", - "eventax/1.3 (eventax; http://www.eventax.de/; info@eventax.de)", - "Everest-Vulcan Inc./0.1 (R&D project; host=e-1-24; http://everest.vulcan.com/crawlerhelp)", - "Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)", - "Exabot-Images/1.0", - "Exabot-Test/1.0", - "Exabot/2.0", - "Exabot/3.0", - "ExactSeek Crawler/0.1", - "exactseek-crawler-2.63 (crawler@exactseek.com)", - "exactseek-pagereaper-2.63 (crawler@exactseek.com)", - "exactseek.com", - "Exalead NG/MimeLive Client (convert/http/0.120)", - "Excalibur Internet Spider V6.5.4", - "Execrawl/1.0 (Execrawl; http://www.execrawl.com/; bot@execrawl.com)", - "exooba crawler/exooba crawler (crawler for exooba.com; http://www.exooba.com/; info at exooba dot com)", - "exooba/exooba crawler (exooba; exooba)", - "ExperimentalHenrytheMiragoRobot", - "ExtractorPro", - "EyeCatcher (Download-tipp.de)/1.0", - "Factbot 1.09 (see http://www.factbites.com/webmasters.php)", - "factbot : http://www.factbites.com/robots", - "Fast Crawler Gold Edition", - "FAST Enterprise Crawler 6 (Experimental)", - "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/", - "FAST Enterprise Crawler 6 used by Cobra Development (admin@fastsearch.com)", - "FAST Enterprise Crawler 6 used by Comperio AS (sts@comperio.no)", - "FAST Enterprise Crawler 6 used by FAST (FAST)", - "FAST Enterprise Crawler 6 used by Pages Jaunes (pvincent@pagesjaunes.fr)", - "FAST Enterprise Crawler 6 used by Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)", - "FAST Enterprise Crawler 6 used by Singapore Press Holdings (crawler@sphsearch.sg)", - "FAST Enterprise Crawler/6 (www.fastsearch.com)", - "FAST Enterprise Crawler/6.4 (helpdesk at fast.no)", - "FAST FirstPage retriever (compatible; MSIE 5.5; Mozilla/4.0)", - "FAST MetaWeb Crawler (helpdesk at fastsearch dot com)", - "Fast PartnerSite Crawler", - "FAST-WebCrawler/2.2.10 (Multimedia Search) (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)", - "FAST-WebCrawler/2.2.6 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)", - "FAST-WebCrawler/2.2.7 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no", - "FAST-WebCrawler/2.2.8 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no", - "FAST-WebCrawler/3.2 test", - "FAST-WebCrawler/3.3 (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)", - "FAST-WebCrawler/3.4/Nirvana (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)", - "FAST-WebCrawler/3.4/PartnerSite (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)", - "FAST-WebCrawler/3.5 (atw-crawler at fast dot no; http://fast.no/support.php?c=faqs/crawler)", - "FAST-WebCrawler/3.6 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", - "FAST-WebCrawler/3.6/FirstPage (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)", - "FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", - "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", - "FAST-WebCrawler/3.8 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", - "FAST-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", - "FAST-WebCrawler/3.x Multimedia", - "FAST-WebCrawler/3.x Multimedia (mm dash crawler at fast dot no)", - "fastbot crawler beta 2.0 (+http://www.fastbot.de)", - "FastBug http://www.ay-up.com", - "FastCrawler 3.0.1 (crawler@1klik.dk)", - "FastSearch Web Crawler for Verizon SuperPages (kevin.watters@fastsearch.com)", - "Favcollector/2.0 (info@favcollector.com http://www.favcollector.com/)", - "favo.eu crawler/0.6 (http://www.favo.eu)", - "Faxobot/1.0", - "Feed Seeker Bot (RSS Feed Seeker http://www.MyNewFavoriteThing.com/fsb.php)", - "Feed24.com", - "FeedChecker/0.01", - "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)", - "FeedHub FeedDiscovery/1.0 (http://www.feedhub.com)", - "FeedHub MetaDataFetcher/1.0 (http://www.feedhub.com)", - "Feedjit Favicon Crawler 1.0", - "Feedster Crawler/3.0; Feedster, Inc.", - "Felix - Mixcat Crawler (+http://mixcat.com)", - "FFC Trap Door Spider", - "Filtrbox/1.0", - "Findexa Crawler (http://www.findexa.no/gulesider/article26548.ece)", - "findlinks/x.xxx (+http://wortschatz.uni-leipzig.de/findlinks/) ", - "FineBot", - "Firefly/1.0", - "Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)", - "Firefox (kastaneta03@hotmail.com)", - "Firefox_1.0.6 (kasparek@naparek.cz)", - "FirstGov.gov Search - POC:firstgov.webmasters@gsa.gov", - "firstsbot", - "Flapbot/0.7.2 (Flaptor Crawler; http://www.flaptor.com; crawler at flaptor period com)", - "Flexum spider", - "Flexum/2.0", - "FlickBot 2.0 RPT-HTTPClient/0.3-3", - "flunky", - "FnooleBot/2.5.2 (+http://www.fnoole.com/addurl.html)", - "FocusedSampler/1.0", - "Folkd.com Spider/0.1 beta 1 (www.folkd.com)", - "Fooky.com/ScorpionBot/ScoutOut; http://www.fooky.com/scorpionbots", - "Francis/1.0 (francis@neomo.de http://www.neomo.de/)", - "Franklin Locator 1.8", - "FreeFind.com-SiteSearchEngine/1.0 (http://freefind.com; spiderinfo@freefind.com)", - "FreshNotes crawler< report problems to crawler-at-freshnotes-dot-com", - "FSurf15a 01", - "FTB-Bot http://www.findthebest.co.uk/", - "Full Web Bot 0416B", - "Full Web Bot 0516B", - "Full Web Bot 2816B", - "FuseBulb.Com", - "FyberSpider (+http://www.fybersearch.com/fyberspider.php)", - "GAIS Robot/1.0B2", - "Gaisbot/3.0 (indexer@gais.cs.ccu.edu.tw; http://gais.cs.ccu.edu.tw/robot.php)", - "Gaisbot/3.0+(robot06@gais.cs.ccu.edu.tw;+http://gais.cs.ccu.edu.tw/robot.php)", - "GalaxyBot/1.0 (http://www.galaxy.com/galaxybot.html)", - "Gallent Search Spider v1.4 Robot 2 (http://robot.GallentSearch.com)", - "gamekitbot/1.0 (+http://www.uchoose.de/crawler/gamekitbot/)", - "GammaSpider/1.0", - "gazz/x.x (gazz@nttrd.com)", - "generic_crawler/01.0217/", - "genieBot (http://64.5.245.11/faq/faq.html)", - "geniebot wgao@genieknows.com", - "GeonaBot 1.x; http://www.geona.com/", - "gigabaz/3.1x (baz@gigabaz.com; http://gigabaz.com/gigabaz/)", - "Gigabot/2.0 (gigablast.com)", - "Gigabot/2.0/gigablast.com/spider.html", - "Gigabot/2.0; http://www.gigablast.com/spider.html", - "Gigabot/2.0att", - "Gigabot/3.0 (http://www.gigablast.com/spider.html)", - "Gigabot/x.0", - "GigabotSiteSearch/2.0 (sitesearch.gigablast.com)", - "GNODSPIDER (www.gnod.net)", - "Goblin/0.9 (http://www.goguides.org/)", - "Goblin/0.9.x (http://www.goguides.org/goblin-info.html)", - "GoForIt.com", - "GOFORITBOT ( http://www.goforit.com/about/ )", - "gonzo1[P] +http://www.suchen.de/popups/faq.jsp", - "gonzo2[P] +http://www.suchen.de/faq.html", - "Goofer/0.2", - "Googlebot-Image/1.0", - "Googlebot-Image/1.0 ( http://www.googlebot.com/bot.html)", - "Googlebot/2.1 ( http://www.google.com/bot.html)", - "Googlebot/2.1 ( http://www.googlebot.com/bot.html)", - "Googlebot/Test ( http://www.googlebot.com/bot.html)", - "GrapeFX/0.3 libwww/5.4.0", - "great-plains-web-spider/flatlandbot (Flatland Industries Web Spider; http://www.flatlandindustries.com/flatlandbot.php; jason@flatlandindustries.com)", - "GrigorBot 0.8 (http://www.grigor.biz/bot.html)", - "Gromit/1.0", - "grub crawler(http://www.grub.org)", - "grub-client", - "gsa-crawler (Enterprise; GID-01422; jplastiras@google.com)", - "gsa-crawler (Enterprise; GID-01742;gsatesting@rediffmail.com)", - "gsa-crawler (Enterprise; GIX-02057; dm@enhesa.com)", - "gsa-crawler (Enterprise; GIX-03519; cknuetter@stubhub.com)", - "gsa-crawler (Enterprise; GIX-0xxxx; enterprise-training@google.com)", - "Guestbook Auto Submitter", - "Gulliver/1.3", - "Gulper Web Bot 0.2.4 (www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)", - "Gungho/0.08004 (http://code.google.com/p/gungho-crawler/wiki/Index)", - "GurujiBot/1.0 (+http://www.guruji.com/WebmasterFAQ.html)", - "GurujiImageBot/1.0 (+http://www.guruji.com/en/WebmasterFAQ.html)", - "HappyFunBot/1.1", - "Harvest-NG/1.0.2", - "Hatena Antenna/0.4 (http://a.hatena.ne.jp/help#robot)", - "Hatena Pagetitle Agent/1.0", - "Hatena RSS/0.3 (http://r.hatena.ne.jp)", - "hbtronix.spider.2 -- http://hbtronix.de/spider.php", - "HeinrichderMiragoRobot", - "HeinrichderMiragoRobot (http://www.miragorobot.com/scripts/deinfo.asp)", - "Helix/1.x ( http://www.sitesearch.ca/helix/)", - "HenriLeRobotMirago (http://www.miragorobot.com/scripts/frinfo.asp)", - "HenrytheMiragoRobot", - "HenryTheMiragoRobot (http://www.miragorobot.com/scripts/mrinfo.asp)", - "Hi! I'm CsCrawler my homepage: http://www.kde.cs.uni-kassel.de/lehre/ss2005/googlespam/crawler.html RPT-HTTPClient/0.3-3", - "Hippias/0.9 Beta", - "HitList", - "Hitwise Spider v1.0 http://www.hitwise.com", - "holmes/3.11 (http://morfeo.centrum.cz/bot)", - "holmes/3.9 (onet.pl)", - "holmes/3.xx (OnetSzukaj/5.0; +http://szukaj.onet.pl)", - "holmes/x.x", - "HolmesBot (http://holmes.ge)", - "HomePageSearch(hpsearch.uni-trier.de)", - "Homerbot: www.homerweb.com", - "Honda-Search/0.7.2 (Nutch; http://lucene.apache.org/nutch/bot.html; search@honda-search.com)", - "HooWWWer/2.1.3 (debugging run) (+http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-infohiit.fi)", - "HooWWWer/2.1.x ( http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-infohiit.fi)", - "HPL/Nutch-0.9 -", - "htdig/3.1.6 (http://computerorgs.com)", - "htdig/3.1.6 (unconfigured@htdig.searchengine.maintainer)", - "htdig/3.1.x (root@localhost)", - "http://Ask.24x.Info/ (http://narres.it/)", - "http://hilfe.acont.de/bot.html ACONTBOT", - "http://www.almaden.ibm.com/cs/crawler", - "http://www.almaden.ibm.com/cs/crawler [rc1.wf.ibm.com]", - "http://www.almaden.ibm.com/cs/crawler [wf216]", - "http://www.istarthere.com_spider@istarthere.com", - "http://www.monogol.de", - "http://www.trendtech.dk/spider.asp)", - "i1searchbot/2.0 (i1search web crawler; http://www.i1search.com; crawler@i1search.com)", - "IAArchiver-1.0", - "iaskspider2 (iask@staff.sina.com.cn)", - "ia_archiver", - "ia_archiver-web.archive.org", - "ia_archiver/1.6", - "ICC-Crawler(Mozilla-compatible; http://kc.nict.go.jp/icc/crawl.html; icc-crawl(at)ml(dot)nict(dot)go(dot)jp)", - "ICC-Crawler(Mozilla-compatible;http://kc.nict.go.jp/icc/crawl.html;icc-crawl-contact(at)ml(dot)nict(dot)go(dot)jp)", - "iCCrawler (http://www.iccenter.net)", - "ICCrawler - ICjobs (http://www.icjobs.de/bot.htm)", - "ichiro/x.0 (http://help.goo.ne.jp/door/crawler.html)", - "ichiro/x.0 (ichiro@nttr.co.jp)", - "IconSurf/2.0 favicon finder (see http://iconsurf.com/robot.html)", - "IconSurf/2.0 favicon monitor (see http://iconsurf.com/robot.html)", - "ICRA_label_spider/x.0", - "icsbot-0.1", - "ideare - SignSite/1.x", - "iFeed.jp/2.0 (www.psychedelix.com/agents/agents.rss; 0 subscribers)", - "igdeSpyder (compatible; igde.ru; +http://igde.ru/doc/tech.html)", - "IIITBOT/1.1 (Indian Language Web Search Engine; http://webkhoj.iiit.net; pvvpr at iiit dot ac dot in)", - "ilial/Nutch-0.9 (Ilial, Inc. is a Los Angeles based Internet startup company. For more information please visit http://www.ilial.com/crawler; http://www.ilial.com/crawler; crawl@ilial.com)", - "ilial/Nutch-0.9-dev", - "IlseBot/1.x", - "IlTrovatore-Setaccio ( http://www.iltrovatore.it)", - "Iltrovatore-Setaccio/0.3-dev (Indexing; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)", - "IlTrovatore-Setaccio/1.2 ( http://www.iltrovatore.it/aiuto/faq.html)", - "Iltrovatore-Setaccio/1.2 (It-bot; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)", - "iltrovatore-setaccio/1.2-dev (spidering; http://www.iltrovatore.it/aiuto/.....)", - "IlTrovatore/1.2 (IlTrovatore; http://www.iltrovatore.it/bot.html; bot@iltrovatore.it)", - "ImageWalker/2.0 (www.bdbrandprotect.com)", - "IncyWincy data gatherer(webmaster@loopimprovements.com", - "IncyWincy page crawler(webmaster@loopimprovements.com", - "IncyWincy(http://www.look.com)", - "IncyWincy(http://www.loopimprovements.com/robot.html)", - "IncyWincy/2.1(loopimprovements.com/robot.html)", - "IndexTheWeb.com Crawler7", - "Industry Program 1.0.x", - "Inet library", - "info@pubblisito.com- (http://www.pubblisito.com) il Sud dei Motori di Ricerca", - "InfoFly/1.0 (http://www.versions-project.org/)", - "INFOMINE/8.0 Adders", - "INFOMINE/8.0 RemoteServices", - "INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)", - "InfoNaviRobot(F107)", - "InfoSeek Sidewinder/0.9", - "InfoSeek Sidewinder/1.0A", - "InfoSeek Sidewinder/1.1A", - "Infoseek SideWinder/1.45 (Compatible; MSIE 10.0; UNIX)", - "Infoseek SideWinder/2.0B (Linux 2.4 i686)", - "INGRID/3.0 MT (webcrawler@NOSPAMexperimental.net; http://webmaster.ilse.nl/jsp/webmaster.jsp)", - "Inktomi Search", - "InnerpriseBot/1.0 (http://www.innerprise.com/)", - "Insitor.com search and find world wide!", - "Insitornaut", - "Internet Ninja x.0", - "InternetArchive/0.8-dev(Nutch;http://lucene.apache.org/nutch/bot.html;nutch-agent@lucene.apache", - "InternetSeer.com", - "IOI/2.0 (ISC Open Index crawler; http://index.isc.org/; bot@index.isc.org)", - "IPiumBot laurion(dot)com", - "IpselonBot/0.xx-beta (Ipselon; http://www.ipselon.com; ipselonbot@ipselon.com)", - "IRLbot/1.0 ( http://irl.cs.tamu.edu/crawler)", - "IRLbot/3.0 (compatible; MSIE 6.0; http://irl.cs.tamu.edu/crawler/)", - "ISC Systems iRc Search 2.1", - "IUPUI Research Bot v 1.9a", - "IWAgent/ 1.0 - www.brandprotect.com", - "Jabot/6.x (http://odin.ingrid.org/)", - "Jabot/7.x.x (http://odin.ingrid.org/)", - "Jack", - "Jambot/0.1.x (Jambot; http://www.jambot.com/blog; crawler@jambot.com)", - "Jambot/0.2.1 (Jambot; http://www.jambot.com/blog/static.php?page=webmaster-robot; crawler@jambot.com)", - "Jayde Crawler. http://www.jayde.com", - "Jetbot/1.0", - "JobSpider_BA/1.1", - "Jyxobot/x", - "k2spider", - "KAIST AITrc Crawler", - "KakleBot - www.kakle.com/0.1 (KakleBot - www.kakle.com; http:// www.kakle.com/bot.html; support@kakle.com)", - "kalooga/kalooga-4.0-dev-datahouse (Kalooga; http://www.kalooga.com; info@kalooga.com)", - "kalooga/KaloogaBot (Kalooga; http://www.kalooga.com/info.html?page=crawler; crawler@kalooga.com)", - "Kenjin Spider", - "Kevin http://dznet.com/kevin/", - "Kevin http://websitealert.net/kevin/", - "KE_1.0/2.0 libwww/5.2.8", - "KFSW-Bot (Version: 1.01 powered by KFSW www.kfsw.de)", - "kinja-imagebot (http://www.kinja.com/)", - "kinjabot (http://www.kinja.com)", - "KIT-Fireball/2.0", - "KIT-Fireball/2.0 (compatible; Mozilla 4.0; MSIE 5.5)", - "KnowItAll(knowitall@cs.washington.edu)", - "Knowledge.com/0.x", - "Krugle/Krugle,Nutch/0.8+ (Krugle web crawler; http://www.krugle.com/crawler/info.html; webcrawler@krugle.com)", - "KSbot/1.0 (KnowledgeStorm crawler; http://www.knowledgestorm.com/resources/content/crawler/index.html; crawleradmin@knowledgestorm.com)", - "kuloko-bot/0.x", - "kulokobot www.kuloko.com kuloko@backweave.com", - "kulturarw3/0.1", - "LapozzBot/1.4 ( http://robot.lapozz.com)", - "LapozzBot/1.5 (+http://robot.lapozz.hu)", - "larbin (samualt9@bigfoot.com)", - "LARBIN-EXPERIMENTAL (efp@gmx.net)", - "larbin_2.1.1 larbin2.1.1@somewhere.com", - "larbin_2.2.0 (crawl@compete.com)", - "larbin_2.2.1_de_Viennot (Laurent.Viennot@inria.fr)", - "larbin_2.2.2 (sugayama@lab7.kuis.kyoto-u.ac.jp)", - "larbin_2.2.2_guillaume (guillaume@liafa.jussieu.fr)", - "larbin_2.6.0 (larbin2.6.0@unspecified.mail)", - "larbin_2.6.1 (larbin2.6.1@unspecified.mail)", - "larbin_2.6.2 (hamasaki@grad.nii.ac.jp)", - "larbin_2.6.2 (larbin2.6.2@unspecified.mail)", - "larbin_2.6.2 (listonATccDOTgatechDOTedu)", - "larbin_2.6.2 (pimenas@systems.tuc.gr)", - "larbin_2.6.2 (tom@lemurconsulting.com)", - "larbin_2.6.2 (vitalbox1@hotmail.com)", - "larbin_2.6.3 (ltaa_web_crawler@groupes.epfl.ch)", - "larbin_2.6.3 (wgao@genieknows.com)", - "larbin_2.6.3_for_(http://cosco.hiit.fi/search/) tsilande@hiit.fi", - "larbin_2.6_basileocaml (basile.starynkevitch@cea.fr)", - "larbin_devel (http://pauillac.inria.fr/~ailleret/prog/larbin/)", - "lawinfo-crawler/Nutch-0.9-dev (Crawler for lawinfo.com pages; http://www.lawinfo.com; webmaster@lawinfo.com)", - "LECodeChecker/3.0 libgetdoc/1.0", - "LEIA/2.90", - "LEIA/3.01pr (LEIAcrawler; [SNIP])", - "LetsCrawl.com/1.0 +http://letscrawl.com/", - "LexiBot/1.00", - "Libby_1.1/libwww-perl/5.47", - "LibertyW (+http://www.lw01.com)", - "libWeb/clsHTTP -- hiongun@kt.co.kr", - "libwww-perl/5.41", - "libwww-perl/5.45", - "libwww-perl/5.48", - "libwww-perl/5.52 FP/2.1", - "libwww-perl/5.52 FP/4.0", - "libwww-perl/5.65", - "libwww-perl/5.800", - "libwww/5.3.2", - "LijitSpider/Nutch-0.9 (Reports crawler; http://www.lijit.com/; info(a)lijit(d)com)", - "Lincoln State Web Browser", - "linkbot", - "linknzbot", - "Links 2.0 (http://gossamer-threads.com/scripts/links/)", - "Links SQL (http://gossamer-threads.com/scripts/links-sql/)", - "LinkScan/11.0beta2 UnixShareware robot from Elsop.com (used by Indiafocus/Indiainfo)", - "LinkScan/9.0g Unix", - "LinkScan/x.x Unix", - "LiveTrans/Nutch-0.9 (maintainer: cobain at iis dot sinica dot edu dot tw; http://wkd.iis.sinica.edu.tw/LiveTrans/)", - "Llaut/1.0 (http://mnm.uib.es/~gallir/llaut/bot.html)", - "LMQueueBot/0.2", - "lmspider (lmspider@scansoft.com)", - "LNSpiderguy", - "LocalBot/1.0 ( http://www.localbot.co.uk/)", - "LocalcomBot/1.2.x ( http://www.local.com/bot.htm)", - "Lockstep Spider/1.0", - "Look.com", - "Lovel as 1.0 ( +http://www.everatom.com)", - "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (lti crawler for CMU; http://www.lti.cs.cmu.edu; changkuk at cmu dot edu)", - "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (Research spider using Nutch; http://www.lemurproject.org; mhoy@cs.cmu.edu)", - "lwp-trivial/1.32", - "lwp-trivial/1.34", - "lwp-trivial/1.34", - "LWP::Simple/5.22", - "LWP::Simple/5.36", - "LWP::Simple/5.48", - "LWP::Simple/5.50", - "LWP::Simple/5.51", - "LWP::Simple/5.53", - "LWP::Simple/5.63", - "LWP::Simple/5.803", - "Lycos_Spider_(modspider)", - "Lycos_Spider_(T-Rex)", - "Lynx/2.8.4rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6c (human-guided@lerly.net)", - "Mac Finder 1.0.xx", - "Mackster( http://www.ukwizz.com )", - "Mahiti.Com/Mahiti Crawler-1.0 (Mahiti.Com; http://mahiti.com ; mahiti.com)", - "Mail.Ru/1.0", - "mailto:webcraft@bea.com", - "mammoth/1.0 ( http://www.sli-systems.com/)", - "MantraAgent", - "MapoftheInternet.com ( http://MapoftheInternet.com)", - "Mariner/5.1b [de] (Win95; I ;Kolibri gncwebbot)", - "Marketwave Hit List", - "Martini", - "MARTINI", - "Marvin v0.3", - "MaSagool/1.0 (MaSagool; http://sagool.jp/; info@sagool.jp)", - "MasterSeek", - "Mata Hari/2.00 ", - "Matrix S.p.A. - FAST Enterprise Crawler 6 (Unknown admin e-mail address)", - "maxomobot/dev-20051201 (maxomo; http://67.102.134.34:4047/MAXOMO/MAXOMObot.html; maxomobot@maxomo.com)", - "MDbot/1.0 (+http://www.megadownload.net/bot.html)", - "MediaCrawler-1.0 (Experimental)", - "Mediapartners-Google/2.1 ( http://www.googlebot.com/bot.html)", - "MediaSearch/0.1", - "MegaSheep v1.0 (www.searchuk.com internet sheep)", - "Megite2.0 (http://www.megite.com)", - "Mercator-1.x", - "Mercator-2.0", - "Mercator-Scrub-1.1", - "Metaeuro Web Crawler/0.2 (MetaEuro Web Search Clustering Engine; http://www.metaeuro.com; crawler at metaeuro dot com)", - "MetaGer-LinkChecker", - "MetagerBot/0.8-dev (MetagerBot; http://metager.de; )", - "MetaGer_PreChecker0.1", - "Metaspinner/0.01 (Metaspinner; http://www.meta-spinner.de/; support@meta-spinner.de/)", - "metatagsdir/0.7 (+http://metatagsdir.com/directory/)", - "MFC Foundation Class Library 4.0", - "MicroBaz", - "Microsoft Small Business Indexer", - "Microsoft URL Control - 6.00.8xxx", - "MicrosoftPrototypeCrawler (How's my crawling? mailto:newbiecrawler@hotmail.com)", - "Missauga Locate 1.0.0", - "Missigua Locator 1.9", - "Missouri College Browse", - "Misterbot-Nutch/0.7.1 (Misterbot-Nutch; http://www.misterbot.fr; admin@misterbot.fr)", - "Miva (AlgoFeedback@miva.com)", - "Mizzu Labs 2.2", - "MJ12bot/vx.x.x (http://majestic12.co.uk/bot.php?+)", - "MJ12bot/vx.x.x (http://www.majestic12.co.uk/projects/dsearch/mj12bot.php)", - "MJBot (SEO assessment)", - "MLBot (www.metadatalabs.com)", - "MnogoSearch/3.2.xx", - "Mo College 1.9", - "moget/x.x (moget@goo.ne.jp)", - "mogimogi/1.0", - "MojeekBot/0.x (archi; http://www.mojeek.com/bot.html)", - "Morris - Mixcat Crawler ( http://mixcat.com)", - "Mouse-House/7.4 (spider_monkey spider info at www.mobrien.com/sm.shtml)", - "mozDex/0.xx-dev (mozDex; http://www.mozdex.com/en/bot.html; spider@mozdex.com)", - "Mozilla (Mozilla@somewhere.com)", - "Mozilla 4.0(compatible; BotSeer/1.0; +http://botseer.ist.psu.edu)", - "Mozilla/2.0 (compatible; Ask Jeeves)", - "Mozilla/2.0 (compatible; Ask Jeeves/Teoma)", - "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml) ", - "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://sp.ask.com/docs/about/tech_crawling.html)", - "Mozilla/2.0 (compatible; EZResult -- Internet Search Engine)", - "Mozilla/2.0 (compatible; NEWT ActiveX; Win32)", - "Mozilla/2.0 (compatible; T-H-U-N-D-E-R-S-T-O-N-E)", - "Mozilla/3.0 (compatible; Fluffy the spider; http://www.searchhippo.com/; info@searchhippo.com)", - "Mozilla/3.0 (compatible; Indy Library)", - "Mozilla/3.0 (compatible; MuscatFerret/1.5.4; claude@euroferret.com)", - "Mozilla/3.0 (compatible; MuscatFerret/1.5; olly@muscat.co.uk)", - "Mozilla/3.0 (compatible; MuscatFerret/1.6.x; claude@euroferret.com)", - "Mozilla/3.0 (compatible; scan4mail (advanced version) http://www.peterspages.net/?scan4mail)", - "Mozilla/3.0 (compatible; ScollSpider; http://www.webwobot.com)", - "Mozilla/3.0 (compatible; Webinator-DEV01.home.iprospect.com/2.56)", - "Mozilla/3.0 (compatible; Webinator-indexer.cyberalert.com/2.56)", - "Mozilla/3.0 (INGRID/3.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)", - "Mozilla/3.0 (Slurp.so/Goo; slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Mozilla/3.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Mozilla/3.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Mozilla/3.0 (Vagabondo/1.1 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)", - "Mozilla/3.0 (Vagabondo/1.x MT; webagent@wise-guys.nl; http://webagent.wise-guys.nl/)", - "Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)", - "Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)", - "Mozilla/3.01 (Compatible; Links2Go Similarity Engine)", - "Mozilla/4.0", - "Mozilla/4.0 (agadine3.0) www.agada.de", - "Mozilla/4.0 (compatible: AstraSpider V.2.1 : astrafind.com)", - "Mozilla/4.0 (compatible; Vagabondo/2.2; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)", - "Mozilla/4.0 (compatible; Vagabondo/4.0Beta; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)", - "Mozilla/4.0 (compatible; Advanced Email Extractor v2.xx)", - "Mozilla/4.0 (compatible; B_L_I_T_Z_B_O_T)", - "Mozilla/4.0 (compatible; ChristCrawler.com ChristCrawler@ChristCENTRAL.com)", - "Mozilla/4.0 (compatible; crawlx, crawler@trd.overture.com)", - "Mozilla/4.0 (compatible; DAUMOA-video; +http://ws.daum.net/aboutkr.html)", - "Mozilla/4.0 (compatible; FastCrawler3 support-fastcrawler3@fast.no)", - "Mozilla/4.0 (compatible; FDSE robot)", - "Mozilla/4.0 (compatible; GPU p2p crawler http://gpu.sourceforge.net/search_engine.php)", - "Mozilla/4.0 (compatible; grub-client-0.2.x; Crawl your stuff with http://grub.org)", - "Mozilla/4.0 (compatible; grub-client-0.3.x; Crawl your own stuff with http://grub.org)", - "Mozilla/4.0 (compatible; grub-client-2.x)", - "Mozilla/4.0 (compatible; Iplexx Spider/1.0 http://www.iplexx.at)", - "Mozilla/4.0 (compatible; MSIE 4.01; Vonna.com b o t)", - "Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; PPC; 240x320; SPV M700; OpVer 19.123.2.733) OrangeBot-Mobile 2008.0 (mobilesearch.support@orange-ftgroup.com)", - "Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) Indonesia Interactive", - "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) (samualt9@bigfoot.com)", - "Mozilla/4.0 (compatible; MSIE 5.0; NetNose-Crawler 2.0; A New Search Experience: http://www.netnose.com)", - "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) TrueRobot; 1.5", - "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/)", - "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot; 1.6", - "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent", - "Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com)", - "Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com/; http://www.galaxy.com/info/crawler.html)", - "Mozilla/4.0 (compatible; MSIE 5.0; YANDEX)", - "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; obot)", - "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; QXW03018)", - "Mozilla/4.0 (compatible; MSIE 6.0 compatible; Asterias Crawler v4; +http://www.singingfish.com/help/spider.html; webmaster@singingfish.com); SpiderThread Revision: 3.10", - "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Skampy/0.9.x [en]", - "Mozilla/4.0 (compatible; MSIE 6.0; TargetSeek/1.0; +http://www.targetgroups.net/TargetSeek.html)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP entries t_st; http://tuezilla.de/t_st-odp-entries-agent.html)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP links test; http://tuezilla.de/test-odp-links-agent.html)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; heritrix/1.3.0 http://www.cs.washington.edu/research/networking/websys/)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 qihoobot@qihoo.net)", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)", - "Mozilla/4.0 (compatible; MSIE enviable; DAUMOA 2.0; DAUM Web Robot; Daum Communications Corp., Korea; +http://ws.daum.net/aboutkr.html)", - "Mozilla/4.0 (compatible; MSIE is not me; DAUMOA/1.0.1; DAUM Web Robot; Daum Communications Corp., Korea)", - "Mozilla/4.0 (compatible; NaverBot/1.0; http://help.naver.com/delete_main.asp)", - "Mozilla/4.0 (compatible; SpeedySpider; www.entireweb.com)", - "Mozilla/4.0 (compatible; www.galaxy.com)", - "Mozilla/4.0 (compatible; Y!J; for robot study; keyoshid)", - "Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)", - "Mozilla/4.0 (JemmaTheTourist;http://www.activtourist.com)", - "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)", - "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 FAKE (compatible; Googlebot/2.1; http://www.google.com/bot.html)", - "Mozilla/4.0 (Mozilla; http://www.mozilla.org/docs/en/bot.html; master@mozilla.com)", - "Mozilla/4.0 (Sleek Spider/1.2)", - "Mozilla/4.0 compatible FurlBot/Furl Search 2.0 (FurlBot; http://www.furl.net; wn.furlbot@looksmart.net)", - "Mozilla/4.0 compatible ZyBorg/1.0 (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)", - "Mozilla/4.0 compatible ZyBorg/1.0 (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)", - "Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)", - "Mozilla/4.0 compatible ZyBorg/1.0 for Homepage (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)", - "Mozilla/4.0 efp@gmx.net", - "Mozilla/4.0 [en] (Ask Jeeves Corporate Spider)", - "Mozilla/4.0(compatible; Zealbot 1.0)", - "Mozilla/4.04 (compatible; Dulance bot; +http://www.dulance.com/bot.jsp)", - "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_TrueRobot/1.4 libwww/5.2.8", - "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_VoilaBot/1.6 libwww/5.3.2", - "Mozilla/4.6 [en] (http://www.cnet.com/)", - "Mozilla/4.7", - "Mozilla/4.7 (compatible; http://eidetica.com/spider)", - "Mozilla/4.7 (compatible; Intelliseek; http://www.intelliseek.com)", - "Mozilla/4.7 (compatible; Whizbang)", - "Mozilla/4.7 (compatible; WhizBang; http://www.whizbang.com/crawler)", - "Mozilla/4.7 [en](BecomeBot@exava.com)", - "Mozilla/4.7 [en](Exabot@exava.com)", - "Mozilla/4.72 [en] (BACS http://www.ba.be)", - "Mozilla/5.0", - "Mozilla/5.0 (+http://www.eurekster.com/mammoth) Mammoth/0.1", - "Mozilla/5.0 (+http://www.sli-systems.com/) Mammoth/0.1", - "Mozilla/5.0 (Clustered-Search-Bot/1.0; support@clush.com; http://www.clush.com/)", - "Mozilla/5.0 (compatible; +http://www.evri.com/evrinid)", - "Mozilla/5.0 (compatible; 008/0.83; http://www.80legs.com/spider.html;) Gecko/2008032620", - "Mozilla/5.0 (compatible; Abonti/0.8 - http://www.abonti.com)", - "Mozilla/5.0 (compatible; aiHitBot/1.0; +http://www.aihit.com/)", - "Mozilla/5.0 (compatible; AnsearchBot/1.x; +http://www.ansearch.com.au/)", - "Mozilla/5.0 (compatible; archive.org_bot/1.10.0 +http://www.loc.gov/minerva/crawl.html)", - "Mozilla/5.0 (compatible; archive.org_bot/1.13.1x http://crawler.archive.org)", - "Mozilla/5.0 (compatible; archive.org_bot/1.5.0-200506132127 http://crawler.archive.org) Hurricane Katrina", - "Mozilla/5.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml)", - "Mozilla/5.0 (compatible; BecomeBot/1.23; http://www.become.com/webmasters.html)", - "Mozilla/5.0 (compatible; BecomeBot/1.xx; MSIE 6.0 compatible; http://www.become.com/webmasters.html)", - "Mozilla/5.0 (compatible; BecomeBot/2.0beta; http://www.become.com/webmasters.html)", - "Mozilla/5.0 (compatible; BecomeBot/2.x; MSIE 6.0 compatible; http://www.become.com/site_owners.html)", - "Mozilla/5.0 (compatible; BecomeJPBot/2.3; MSIE 6.0 compatible; +http://www.become.co.jp/site_owners.html)", - "Mozilla/5.0 (compatible; BlogRefsBot/0.1; http://www.blogrefs.com/about/bloggers)", - "Mozilla/5.0 (compatible; Bot; +http://pressemitteilung.ws/spamfilter", - "Mozilla/5.0 (compatible; BuzzRankingBot/1.0; +http://www.buzzrankingbot.com/)", - "Mozilla/5.0 (compatible; Charlotte/1.0b; charlotte@betaspider.com)", - "Mozilla/5.0 (compatible; Charlotte/1.0b; http://www.searchme.com/support/)", - "Mozilla/5.0 (compatible; Crawling jpeg; http://www.yama.info.waseda.ac.jp)", - "Mozilla/5.0 (compatible; de/1.13.2 +http://www.de.com)", - "Mozilla/5.0 (compatible; Diffbot/0.1; +http://www.diffbot.com)", - "Mozilla/5.0 (compatible; DNS-Digger-Explorer/1.0; +http://www.dnsdigger.com)", - "Mozilla/5.0 (compatible; DNS-Digger/1.0; +http://www.dnsdigger.com)", - "Mozilla/5.0 (compatible; EARTHCOM.info/2.01; http://www.earthcom.info)", - "Mozilla/5.0 (compatible; EARTHCOM/2.2; +http://enter4u.eu)", - "Mozilla/5.0 (compatible; Exabot Test/3.0; +http://www.exabot.com/go/robot)", - "Mozilla/5.0 (compatible; FatBot 2.0; http://www.thefind.com/main/CrawlerFAQs.fhtml)", - "Mozilla/5.0 (compatible; Galbot/1.0; +http://www.galbot.com/bot.html)", - "mozilla/5.0 (compatible; genevabot http://www.healthdash.com)", - "Mozilla/5.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)", - "mozilla/5.0 (compatible; heritrix/1.0.4 http://innovationblog.com)", - "Mozilla/5.0 (compatible; heritrix/1.10.2 +http://i.stanford.edu/)", - "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://newstin.com/)", - "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com)", - "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com) [email:paul@page-store.com]", - "mozilla/5.0 (compatible; heritrix/1.3.0 http://archive.crawler.org)", - "Mozilla/5.0 (compatible; heritrix/1.4.0 +http://www.chepi.net)", - "Mozilla/5.0 (compatible; heritrix/1.4t http://www.truveo.com/)", - "Mozilla/5.0 (compatible; heritrix/1.5.0 http://www.l3s.de/~kohlschuetter/projects/crawling/)", - "Mozilla/5.0 (compatible; heritrix/1.5.0-200506231921 http://pandora.nla.gov.au/crawl.html)", - "Mozilla/5.0 (compatible; heritrix/1.6.0 http://www.worio.com/)", - "Mozilla/5.0 (compatible; heritrix/1.7.0 +http://www.greaterera.com/)", - "Mozilla/5.0 (compatible; heritrix/1.x.x +http://www.accelobot.com)", - "Mozilla/5.0 (compatible; heritrix/2.0.0-RC1 +http://www.aol.com)", - "Mozilla/5.0 (compatible; Hermit Search. Com; +http://www.hermitsearch.com)", - "Mozilla/5.0 (compatible; HyperixScoop/1.3; +http://www.hyperix.com)", - "Mozilla/5.0 (compatible; IDBot/1.0; +http://www.id-search.org/bot.html)", - "Mozilla/5.0 (compatible; InterseekWeb/3.x)", - "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)", - "Mozilla/5.0 (compatible; LemSpider 0.1)", - "Mozilla/5.0 (compatible; MojeekBot/2.0; http://www.mojeek.com/bot.html)", - "Mozilla/5.0 (compatible; MSIE 6.0; Podtech Network; crawler_admin@podtech.net)", - "Mozilla/5.0 (compatible; OnetSzukaj/5.0; http://szukaj.onet.pl)", - "Mozilla/5.0 (compatible; PalmeraBot; http://www.links24h.com/help/palmera) Version 0.001", - "Mozilla/5.0 (compatible; pogodak.ba/3.x)", - "Mozilla/5.0 (compatible; Pogodak.hr/3.1)", - "Mozilla/5.0 (compatible; PWeBot/3.1; http://www.programacionweb.net/robot.php)", - "Mozilla/5.0 (compatible; Quantcastbot/1.0; www.quantcast.com)", - "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)", - "Mozilla/5.0 (compatible; Scrubby/2.2; http://www.scrubtheweb.com/)", - "Mozilla/5.0 (compatible; ShunixBot/1.x.x +http://www.shunix.com/robot.htm)", - "Mozilla/5.0 (compatible; ShunixBot/1.x; http://www.shunix.com/bot.htm)", - "Mozilla/5.0 (compatible; SkreemRBot +http://skreemr.com)", - "Mozilla/5.0 (compatible; SummizeBot +http://www.summize.com)", - "Mozilla/5.0 (compatible; Synoobot/0.9; http://www.synoo.com/search/bot.html)", - "Mozilla/5.0 (compatible; Theophrastus/x.x; http://users.cs.cf.ac.uk/N.A.Smith/theophrastus.php)", - "Mozilla/5.0 (compatible; TridentSpider/3.1)", - "Mozilla/5.0 (compatible; Vagabondo/2.1; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)", - "Mozilla/5.0 (compatible; Webduniabot/1.0; +http://search.webdunia.com/bot.aspx)", - "Mozilla/5.0 (compatible; worio bot heritrix/1.10.0 +http://worio.com)", - "Mozilla/5.0 (compatible; WoW Lemmings Kathune/2.0;http://www.wowlemmings.com/kathune.html)", - "Mozilla/5.0 (compatible; Yahoo! DE Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", - "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)", - "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", - "Mozilla/5.0 (compatible; Yoono; http://www.yoono.com/)", - "Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/; )", - "Mozilla/5.0 (compatible; Zenbot/1.3; +http://zen.co.za/webmasters/)", - "Mozilla/5.0 (compatible; zermelo +http://www.powerset.com) [email:paul@page-store.com,crawl@powerset.com]", - "Mozilla/5.0 (compatible;archive.org_bot/1.7.1; collectionId=316; Archive-It; +http://www.archive-it.org)", - "Mozilla/5.0 (compatible;archive.org_bot/heritrix-1.9.0-200608171144 +http://pandora.nla.gov.au/crawl.html)", - "Mozilla/5.0 (compatible;MAINSEEK_BOT)", - "Mozilla/5.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Mozilla/5.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Mozilla/5.0 (Twiceler-0.9 http://www.cuill.com/twiceler/robot.html)", - "Mozilla/5.0 (Version: xxxx Type:xx)", - "Mozilla/5.0 (wgao@genieknows.com)", - "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.7) NimbleCrawler 1.11 obeys UserAgent NimbleCrawler For problems contact: crawler_at_dataalchemy.com", - "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", - "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", - "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@health", - "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@healthline.com", - "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:Spinn3r (Spinn3r 3.1); http://spinn3r.com/robot) Gecko/20021130", - "Mozilla/5.0 URL-Spider", - "Mozilla/5.0 usww.com-Spider-for-w8.net", - "Mozilla/5.0 wgao@genieknows.com", - "Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)", - "MQbot metaquerier.cs.uiuc.edu/crawler", - "MQBOT/Nutch-0.9-dev (MQBOT Nutch Crawler; http://falcon.cs.uiuc.edu; mqbot@cs.uiuc.edu)", - "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)", - "msnbot-Products/1.0 (+http://search.msn.com/msnbot.htm)", - "MSNBOT/0.xx (http://search.msn.com/msnbot.htm)", - "msnbot/x.xx ( http://search.msn.com/msnbot.htm)", - "MSNBOT_Mobile MSMOBOT Mozilla/2.0 (compatible; MSIE 4.02; Windows CE; Default)", - "MSNPTC/1.0", - "MSRBOT (http://research.microsoft.com/research/sv/msrbot)", - "multicrawler ( http://sw.deri.org/2006/04/multicrawler/robots.html)", - "MultiText/0.1", - "MusicWalker2.0 ( http://www.somusical.com)", - "MVAClient", - "Mylinea.com Crawler 2.0", - "Naamah 1.0.1/Blogbot (http://blogbot.de/)", - "Naamah 1.0a/Blogbot (http://blogbot.de/)", - "NABOT/5.0", - "nabot_1.0", - "NameOfAgent (CMS Spider)", - "NASA Search 1.0", - "NationalDirectory-WebSpider/1.3", - "NationalDirectoryAddURL/1.0", - "NaverBot-1.0 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)", - "NaverBot_dloader/1.5", - "NavissoBot", - "NavissoBot/1.7 (+http://navisso.com/)", - "NCSA Beta 1 (http://vias.ncsa.uiuc.edu/viasarchivinginformation.html)", - "Nebullabot/2.2 (http://bot.nebulla.info)", - "NEC Research Agent -- compuman at research.nj.nec.com", - "Net-Seekr Bot/Net-Seekr Bot V1 (http://www.net-seekr.com)", - "NetinfoBot/1.0 (http://netinfo.bg/netinfobot.html)", - "NetLookout/2.24", - "Netluchs/0.8-dev ( ; http://www.netluchs.de/; ___don't___spam_me_@netluchs.de)", - "NetNoseCrawler/v1.0", - "Netprospector JavaCrawler", - "NetResearchServer(http://www.look.com)", - "NetResearchServer/x.x(loopimprovements.com/robot.html)", - "NetSeer/Nutch-0.9 (NetSeer Crawler; http://www.netseer.com; crawler@netseer.com)", - "NetSprint -- 2.0", - "NetWhatCrawler/0.06-dev (NetWhatCrawler from NetWhat.com; http://www.netwhat.com; support@netwhat.com)", - "NetZippy", - "NextGenSearchBot 1 (for information visit http://www.eliyon.com/NextGenSearchBot)", - "NextopiaBOT (+http://www.nextopia.com) distributed crawler client beta v0.x", - "NG-Search/0.90 (NG-SearchBot; http://www.ng-search.com; )", - "NG/1.0", - "NG/4.0.1229", - "NITLE Blog Spider/0.01", - "Noago Spider", - "Nokia-WAPToolkit/1.2 googlebot(at)googlebot.com", - "Nokia6610/1.0 (3.09) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible;YahooSeeker/M1A1-R2D2; http://help.yahoo.com/help/us/ysearch/crawling/crawling-01.html)", - "NokodoBot/1.x (+http://nokodo.com/bot.htm)", - "Norbert the Spider(Burf.com)", - "noxtrumbot/1.0 (crawler@noxtrum.com)", - "noyona_0_1", - "NP/0.1 (NP; http://www.nameprotect.com; npbot@nameprotect.com)", - "NPBot (http://www.nameprotect.com/botinfo.html)", - "NPBot-1/2.0", - "Nsauditor/1.x", - "nsyght.com/Nutch-1.0-dev (nsyght.com; Nsyght.com)", - "nsyght.com/Nutch-x.x (nsyght.com; search.nsyght.com)", - "nttdirectory_robot/0.9 (super-robot@super.navi.ocn.ne.jp)", - "nuSearch Spider www.nusearch.com (compatible; MSIE 4.01)", - "NuSearch Spider (compatible; MSIE 6.0)", - "NuSearch Spider www.nusearch.com", - "Nutch", - "Nutch crawler/Nutch-0.9 (picapage.com; admin@picapage.com)", - "Nutch/Nutch-0.9 (Eurobot; http://www.ayell.eu )", - "NutchCVS/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)", - "NutchCVS/0.7.1 (Nutch running at UW; http://www.nutch.org/docs/en/bot.html; sycrawl@cs.washington.edu)", - "NutchEC2Test/Nutch-0.9-dev (Testing Nutch on Amazon EC2.; http://lucene.apache.org/nutch/bot.html; ec2test at lucene.com)", - "NutchOrg/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)", - "nutchsearch/Nutch-0.9 (Nutch Search 1.0; herceg_novi at yahoo dot com)", - "NutchVinegarCrawl/Nutch-0.8.1 (Vinegar; http://www.cs.washington.edu; eytanadar at gmail dot com)", - "obidos-bot (just looking for books.)", - "ObjectsSearch/0.01-dev (ObjectsSearch;http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)", - "ObjectsSearch/0.0x (ObjectsSearch; http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)", - "oBot ((compatible;Win32))", - "Ocelli/1.x (http://www.globalspec.com/Ocelli)", - "Octora Beta - www.octora.com", - "Octora Beta Bot - www.octora.com", - "OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Internet CategorizerOmniExplorer http://www.omni-explorer.com/ car & shopping search (64.62.175.xxx)", - "OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Job Crawler", - "OmniExplorer_Bot/1.1x (+http://www.omni-explorer.com) Torrent Crawler", - "OmniExplorer_Bot/x.xx (+http://www.omni-explorer.com) WorldIndexer", - "Onet.pl SA- http://szukaj.onet.pl", - "OntoSpider/1.0 libwww-perl/5.65", - "OOZBOT/0.20 ( http://www.setooz.com/oozbot.html ; agentname at setooz dot_com )", - "OpenAcoon v4.0.x (www.openacoon.de)", - "Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)", - "Openfind data gatherer- Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)", - "Openfind Robot/1.1A2", - "OpenISearch/1.x (www.openisearch.com)", - "OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)", - "OpenTextSiteCrawler/2.9.2", - "OpenWebSpider/0.x.x (http://www.openwebspider.org)", - "OpenWebSpider/x", - "OpidooBOT (larbin2.6.3@unspecified.mail)", - "Oracle Ultra Search", - "OrangeSpider", - "Orbiter/T-2.0 (+http://www.dailyorbit.com/bot.htm)", - "Overture-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", - "ozelot/2.7.3 (Search engine indexer; www.flying-cat.de/ozelot; ozelot@flying-cat.de)", - "PADLibrary Spider", - "PageBitesHyperBot/600 (http://www.pagebites.com/)", - "Pagebull http://www.pagebull.com/", - "page_verifier (http://www.securecomputing.com/goto/pv)", - "parallelContextFocusCrawler1.1parallelContextFocusCrawler1.1", - "ParaSite/1.0b (http://www.ianett.com/parasite/)", - "Patwebbot (http://www.herz-power.de/technik.html)", - "PBrowse 1.4b", - "pd02_1.0.0 pd02_1.0.0@dzimi@post.sk", - "PEERbot www.peerbot.com", - "PEval 1.4b", - "PicoSearch/1.0", - "Piffany_Web_Scraper_v0.x", - "Piffany_Web_Spider_v0.x", - "pipeLiner/0.3a (PipeLine Spider;http://www.pipeline-search.com/webmaster.html; webmaster'at'pipeline-search.com)", - "pipeLiner/0.xx (PipeLine Spider; http://www.pipeline-search.com/webmaster.html)", - "Pita", - "PJspider/3.0 (pjspider@portaljuice.com; http://www.portaljuice.com)", - "PlagiarBot/1.0", - "PluckFeedCrawler/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://www.pluck.com; 1 subscribers)", - "Pluggd/Nutch-0.9 (automated crawler http://www.pluggd.com;support at pluggd dot com)", - "Poirot", - "polybot 1.0 (http://cis.poly.edu/polybot/)", - "Pompos/1.x http://dir.com/pompos.html", - "Pompos/1.x pompos@iliad.fr", - "Popdexter/1.0", - "Port Huron Labs", - "PortalBSpider/2.0 (spider@portalb.com)", - "potbot 1.0", - "PRCrawler/Nutch-0.9 (data mining development project; crawler@projectrialto.com)", - "PrivacyFinder Cache Bot v1.0", - "PrivacyFinder/1.1", - "Production Bot 0116B", - "Production Bot 2016B", - "Production Bot DOT 3016B", - "Program Shareware 1.0.2", - "Project XP5 [2.03.07-111203]", - "PROve AnswerBot 4.0", - "ProWebGuide Link Checker (http://www.prowebguide.com)", - "psbot/0.1 (+http://www.picsearch.com/bot.html)", - "PSurf15a 11", - "PSurf15a 51", - "PSurf15a VA", - "psycheclone", - "PubCrawl (pubcrawl.stanford.edu)", - "pulseBot (pulse Web Miner)", - "PWeBot/1.2 Inspector (http://www.programacionweb.net/robot.php)", - "PycURL", - "Python-urllib/1.1x", - "Python-urllib/2.0a1", - "Qango.com Web Directory (http://www.qango.com/)", - "QEAVis Agent/Nutch-0.9 (Quantitative Evaluation of Academic Websites Visibility; http://nlp.uned.es/qeavis", - "QPCreep Test Rig ( We are not indexing- just testing )", - "QuepasaCreep ( crawler@quepasacorp.com )", - "QuepasaCreep v0.9.1x", - "QueryN Metasearch", - "QweeryBot/3.01 ( http://qweerybot.qweery.nl)", - "Qweery_robot.txt_CheckBot/3.01 (http://qweerybot.qweery.com)", - "R6_CommentReader_(www.radian6.com/crawler)", - "R6_FeedFetcher_(www.radian6.com/crawler)", - "rabaz (rabaz at gigabaz dot com)", - "RaBot/1.0 Agent-admin/phortse@hanmail.net", - "ramBot xtreme x.x", - "RAMPyBot - www.giveRAMP.com/0.1 (RAMPyBot - www.giveRAMP.com; http://www.giveramp.com/bot.html; support@giveRAMP.com)", - "RAMPyBot/0.8-dev (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)", - "Rankivabot/3.2 (www.rankiva.com; 3.2; vzmxikn)", - "Rational SiteCheck (Windows NT)", - "Reaper [2.03.10-031204] (http://www.sitesearch.ca/reaper/)", - "Reaper/2.0x (+http://www.sitesearch.ca/reaper)", - "RedCarpet/1.2 (http://www.redcarpet-inc.com/robots.html)", - "RedCell/0.1 (InfoSec Search Bot (Coming Soon); http://www.telegenetic.net/bot.html; lhall@telegenetic.net)", - "RedCell/0.1 (RedCell; telegenetic.net/bot.html; lhall_at_telegenetic.net)", - "RedKernel WWW-Spider 2/0 (+http://www-spider.redkernel-softwares.com/)", - "rico/0.1", - "RixBot (http://babelserver.org/rix)", - "RoboCrawl (http://www.canadiancontent.net)", - "RoboCrawl (www.canadiancontent.net)", - "RoboPal (http://www.findpal.com/)", - "Robot/www.pj-search.com", - "Robot: NutchCrawler- Owner: wdavies@acm.org", - "Robot@SuperSnooper.Com", - "Robozilla/1.0", - "Rotondo/3.1 libwww/5.3.1", - "RRC (crawler_admin@bigfoot.com)", - "RSSMicro.com RSS/Atom Feed Robot", - "RSurf15a 41", - "RSurf15a 51", - "RSurf15a 81", - "RufusBot (Rufus Web Miner; http://64.124.122.252/feedback.html)", - "RufusBot (Rufus Web Miner; http://www.webaroo.com/rooSiteOwners.html)", - "sait/Nutch-0.9 (SAIT Research; http://www.samsung.com)", - "SandCrawler - Compatibility Testing", - "SapphireWebCrawler/1.0 (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)", - "SapphireWebCrawler/Nutch-1.0-dev (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)", - "savvybot/0.2", - "SBIder/0.7 (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)", - "SBIder/0.8-dev (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)", - "ScanWeb", - "ScholarUniverse/0.8 (Nutch;+http://scholaruniverse.com/bot.jsp; fetch-agent@scholaruniverse.com)", - "schwarzmann.biz-Spider_for_paddel.org+(http://www.innerprise.net/usp-spider.asp)", - "ScollSpider/2.0 (+http://www.webwobot.com/ScollSpider.php)", - "Scooter-3.0.EU", - "Scooter-3.0.FS", - "Scooter-3.0.HD", - "Scooter-3.0.VNS", - "Scooter-3.0QI", - "Scooter-3.2", - "Scooter-3.2.BT", - "Scooter-3.2.DIL", - "Scooter-3.2.EX", - "Scooter-3.2.JT", - "Scooter-3.2.NIV", - "Scooter-3.2.SF0", - "Scooter-3.2.snippet", - "Scooter-3.3dev", - "Scooter-ARS-1.1", - "Scooter-ARS-1.1-ih", - "scooter-venus-3.0.vns", - "Scooter-W3-1.0", - "Scooter-W3.1.2", - "Scooter/1.0", - "Scooter/1.0 scooter@pa.dec.com", - "Scooter/1.1 (custom)", - "Scooter/2.0 G.R.A.B. V1.1.0", - "Scooter/2.0 G.R.A.B. X2.0", - "Scooter/3.3", - "Scooter/3.3.QA.pczukor", - "Scooter/3.3.vscooter", - "Scooter/3.3_SF", - "Scooter2_Mercator_x-x.0", - "Scooter_bh0-3.0.3", - "Scooter_trk3-3.0.3", - "ScoutAbout", - "ScoutAnt/0.1; +http://www.ant.com/what_is_ant.com/", - "scoutmaster", - "Scrubby/2.x (http://www.scrubtheweb.com/)", - "Scrubby/3.0 (+http://www.scrubtheweb.com/help/technology.html)", - "Search+", - "Search-Engine-Studio", - "search.ch V1.4", - "search.ch V1.4.2 (spiderman@search.ch; http://www.search.ch)", - "Search/1.0 (http://www.innerprise.net/es-spider.asp)", - "searchbot admin@google.com", - "SearchByUsa/2 (SearchByUsa; http://www.SearchByUsa.com/bot.html; info@SearchByUsa.com)", - "SearchdayBot", - "SearchExpress Spider0.99", - "SearchGuild/DMOZ/Experiment (searchguild@gmail.com)", - "SearchGuild_DMOZ_Experiment (chris@searchguild.com)", - "Searchit-Now Robot/2.2 (+http://www.searchit-now.co.uk)", - "Searchmee! Spider v0.98a", - "SearchSight/2.0 (http://SearchSight.com/)", - "SearchSpider.com/1.1", - "Searchspider/1.2 (SearchSpider; http://www.searchspider.com; webmaster@searchspider.com)", - "SearchTone2.0 - IDEARE", - "Seekbot/1.0 (http://www.seekbot.net/bot.html) HTTPFetcher/0.3", - "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.0 (XDF)", - "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2", - "Seeker.lookseek.com", - "Semager/1.1 (http://www.semager.de/blog/semager-bots/)", - "Semager/1.x (http://www.semager.de)", - "Sensis Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)", - "Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)", - "SeznamBot/1.0", - "SeznamBot/1.0 (+http://fulltext.seznam.cz/)", - "SeznamBot/2.0-test (+http://fulltext.sblog.cz/)", - "ShablastBot 1.0", - "Shim Crawler", - "Shim-Crawler(Mozilla-compatible; http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp)", - "ShopWiki/1.0 ( +http://www.shopwiki.com/)", - "ShopWiki/1.0 ( +http://www.shopwiki.com/wiki/Help:Bot)", - "Shoula.com Crawler 2.0", - "SietsCrawler/1.1 (+http://www.siets.biz)", - "Sigram/Nutch-1.0-dev (Test agent for Nutch development; http://www.sigram.com/bot.html; bot at sigram dot com)", - "Siigle Orumcex v.001 Turkey (http://www.siigle.com)", - "silk/1.0", - "silk/1.0 (+http://www.slider.com/silk.htm)/3.7", - "Sirketcebot/v.01 (http://www.sirketce.com/bot.html)", - "SiteSpider +(http://www.SiteSpider.com/)", - "SiteTruth.com site rating system", - "SiteXpert", - "Skampy/0.9.x (http://www.skaffe.com/skampy-info.html)", - "Skimpy/0.x (http://www.skaffe.com/skampy-info.html)", - "Skywalker/0.1 (Skywalker; anonymous; anonymous)", - "Slarp/0.1", - "Slider_Search_v1-de", - "Slurp/2.0 (slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Slurp/2.0-KiteWeekly (slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Slurp/si (slurp@inktomi.com; http://www.inktomi.com/slurp.html)", - "Slurpy Verifier/1.0", - "SlySearch (slysearch@slysearch.com)", - "SlySearch/1.0 http://www.plagiarism.org/crawler/robotinfo.html", - "SlySearch/1.x http://www.slysearch.com", - "smartwit.com", - "SmiffyDCMetaSpider/1.0", - "snap.com beta crawler v0", - "Snapbot/1.0", - "Snapbot/1.0 (Snap Shots, +http://www.snap.com)", - "SnykeBot/0.6 (http://www.snyke.com)", - "SocSciBot ()", - "SoftHypermarketFileCheckBot/1.0+(+http://www.softhypermaket.com)", - "sogou develop spider", - "Sogou Orion spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", - "sogou spider", - "Sogou web spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", - "sohu agent", - "sohu-search", - "Sosospider+(+http://help.soso.com/webspider.htm)", - "speedfind ramBot xtreme 8.1", - "Speedy Spider (Beta/x.x; speedy@entireweb.com)", - "Speedy Spider (Entireweb; Beta/1.0; http://www.entireweb.com/about/search_tech/speedyspider/)", - "Speedy_Spider (http://www.entireweb.com)", - "Sphere Scout&v4.0 - scout at sphere dot com", - "Sphider", - "Spida/0.1", - "Spider-Sleek/2.0 (+http://search-info.com/linktous.html)", - "spider.batsch.com", - "spider.yellopet.com - www.yellopet.com", - "Spider/maxbot.com admin@maxbot.com", - "SpiderKU/0.x", - "SpiderMan", - "SpiderMonkey/7.0x (SpiderMonkey.ca info at http://spidermonkey.ca/sm.shtml)", - "Spinne/2.0", - "Spinne/2.0 med", - "Spinne/2.0 med_AH", - "Spock Crawler (http://www.spock.com/crawler)", - "sportsuchmaschine.de-Robot (Version: 1.02- powered by www.sportsuchmaschine.de)", - "sproose/0.1-alpha (sproose crawler; http://www.sproose.com/bot.html; crawler@sproose.com)", - "Sqworm/2.9.81-BETA (beta_release; 20011102-760; i686-pc-linux-gnu)", - "Sqworm/2.9.85-BETA (beta_release; 20011115-775; i686-pc-linux-gnu)", - "SSurf15a 11 ", - "StackRambler/x.x ", - "stat statcrawler@gmail.com", - "Steeler/1.x (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)", - "Steeler/3.3 (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)", - "Strategic Board Bot (+http://www.strategicboard.com)", - "Strategic Board Bot (+http://www.strategicboard.com)", - "Submission Spider at surfsafely.com", - "suchbaer.de", - "suchbaer.de (CrawlerAgent v0.103)", - "suchbot", - "Suchknecht.at-Robot", - "suchpadbot/1.0 (+http://www.suchpad.de)", - "SurferF3 1/0", - "suzuran", - "Swooglebot/2.0. (+http://swoogle.umbc.edu/swooglebot.htm)", - "SWSBot-Images/1.2 http://www.smartwaresoft.com/swsbot12.html", - "SygolBot http://www.sygol.net", - "SynoBot", - "Syntryx ANT Scout Chassis Pheromone; Mozilla/4.0 compatible crawler", - "Szukacz/1.x", - "Szukacz/1.x (robot; www.szukacz.pl/jakdzialarobot.html; szukacz@proszynski.pl)", - "tags2dir.com/0.8 (+http://tags2dir.com/directory/)", - "Tagword (http://tagword.com/dmoz_survey.php)", - "Talkro Web-Shot/1.0 (E-mail: webshot@daumsoft.com- Home: http://222.122.15.190/webshot)", - "TCDBOT/Nutch-0.8 (PhD student research;http://www.tcd.ie; mcgettrs at t c d dot IE)", - "TECOMAC-Crawler/0.x", - "Tecomi Bot (http://www.tecomi.com/bot.htm)", - "Teemer (NetSeer, Inc. is a Los Angeles based Internet startup company.; http://www.netseer.com/crawler.html; crawler@netseer.com)", - "Teoma MP", - "teomaagent crawler-admin@teoma.com", - "teomaagent1 [crawler-admin@teoma.com]", - "teoma_agent1", - "Teradex Mapper; mapper@teradex.com; http://www.teradex.com", - "terraminds-bot/1.0 (support@terraminds.de)", - "TerrawizBot/1.0 (+http://www.terrawiz.com/bot.html)", - "Test spider", - "TestCrawler/Nutch-0.9 (Testing Crawler for Research ; http://balihoo.com/index.aspx; tgautier at balihoo dot com)", - "TheRarestParser/0.2a (http://therarestwords.com/)", - "TheSuBot/0.1 (www.thesubot.de)", - "thumbshots-de-Bot (Version: 1.02- powered by www.thumbshots.de)", - "timboBot/0.9 http://www.breakingblogs.com/timbo_bot.html", - "TinEye/1.1 (http://tineye.com/crawler.html)", - "tivraSpider/1.0 (crawler@tivra.com)", - "TJG/Spider", - "Tkensaku/x.x(http://www.tkensaku.com/q.html)", - "Topodia/1.2-dev (Topodia - Crawler for HTTP content indexing; http://www.topodia.com/; support@topodia.com)", - "Toutatis x-xx.x (hoppa.com)", - "Toutatis x.x (hoppa.com)", - "Toutatis x.x-x", - "traazibot/testengine (+http://www.traazi.de)", - "Trampelpfad-Spider", - "Trampelpfad-Spider-v0.1", - "TSurf15a 11", - "Tumblr/1.0 RSS syndication (+http://www.tumblr.com/) (support@tumblr.com)", - "TurnitinBot/x.x (http://www.turnitin.com/robot/crawlerinfo.html)", - "Turnpike Emporium LinkChecker/0.1", - "TutorGig/1.5 (+http://www.tutorgig.com/crawler)", - "Tutorial Crawler 1.4 (http://www.tutorgig.com/crawler)", - "Twiceler www.cuill.com/robots.html", - "Twiceler-0.9 http://www.cuill.com/twiceler/robot.html", - "Tycoon Agent/Nutch-1.0-dev", - "TygoBot", - "TygoProwler", - "UIowaCrawler/1.0", - "UKWizz/Nutch-0.8.1 (UKWizz Nutch crawler; http://www.ukwizz.com/)", - "Ultraseek", - "Under the Rainbow 2.2", - "UofTDB_experiment (leehyun@cs.toronto.edu)", - "updated/0.1-alpha (updated crawler; http://www.updated.com; crawler@updated.com)", - "updated/0.1beta (updated.com; http://www.updated.com; crawler@updated.om)", - "Uptimebot", - "UptimeBot(www.uptimebot.com)", - "URL Spider Pro/x.xx (innerprise.net)", - "urlfan-bot/1.0; +http://www.urlfan.com/site/bot/350.html", - "URL_Spider_Pro/x.x", - "URL_Spider_Pro/x.x+(http://www.innerprise.net/usp-spider.asp)", - "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", - "User-Agent: Mozilla/4.0 (SKIZZLE! Distributed Internet Spider v1.0 - www.SKIZZLE.com)", - "USyd-NLP-Spider (http://www.it.usyd.edu.au/~vinci/bot.html)", - "VadixBot", - "Vagabondo-WAP/2.0 (webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)/1.0 Profile", - "Vagabondo/1.x MT (webagent@wise-guys.nl)", - "Vagabondo/2.0 MT", - "Vagabondo/2.0 MT (webagent at wise-guys dot nl)", - "Vagabondo/2.0 MT (webagent@NOSPAMwise-guys.nl)", - "Vagabondo/3.0 (webagent at wise-guys dot nl)", - "Vakes/0.01 (Vakes; http://www.vakes.com/; search@vakes.com)", - "versus 0.2 (+http://versus.integis.ch)", - "versus crawler eda.baykan@epfl.ch", - "VeryGoodSearch.com.DaddyLongLegs", - "verzamelgids.nl - Networking4all Bot/x.x", - "Verzamelgids/2.2 (http://www.verzamelgids.nl)", - "Vespa Crawler", - "VisBot/2.0 (Visvo.com Crawler; http://www.visvo.com/bot.html; bot@visvo.com)", - "Vision Research Lab image spider at vision.ece.ucsb.edu", - "VMBot/0.x.x (VMBot; http://www.VerticalMatch.com/; vmbot@tradedot.com)", - "Vortex/2.2 (+http://marty.anstey.ca/robots/vortex/)", - "voyager-hc/1.0", - "voyager/1.0", - "voyager/2.0 (http://www.kosmix.com/html/crawler.html)", - "VSE/1.0 (testcrawler@hotmail.com)", - "VSE/1.0 (testcrawler@vivisimo.com)", - "vspider", - "vspider/3.x", - "VWBOT/Nutch-0.9-dev (VWBOT Nutch Crawler; http://vwbot.cs.uiuc.edu;+vwbot@cs.uiuc.edu", - "W3SiteSearch Crawler_v1.1 http://www.w3sitesearch.de", - "wadaino.jp-crawler 0.2 (http://wadaino.jp/)", - "Wavefire/0.8-dev (Wavefire; http://www.wavefire.com; info@wavefire.com)", - "Waypath development crawler - info at waypath dot com", - "Waypath Scout v2.x - info at waypath dot com", - "Web Snooper", - "web2express.org/Nutch-0.9-dev (leveled playing field; http://web2express.org/; info at web2express.org)", - "WebAlta Crawler/1.2.1 (http://www.webalta.ru/bot.html)", - "WebarooBot (Webaroo Bot; http://64.124.122.252/feedback.html)", - "WebarooBot (Webaroo Bot; http://www.webaroo.com/rooSiteOwners.html)", - "webbandit/4.xx.0", - "Webclipping.com", - "WebCompass 2.0", - "WebCorp/1.0", - "webcrawl.net", - "WebFindBot(http://www.web-find.com)", - "Webglimpse 2.xx.x (http://webglimpse.net)", - "Weblog Attitude Diffusion 1.0", - "webmeasurement-bot, http://rvs.informatik.uni-leipzig.de", - "WebRankSpider/1.37 (+http://ulm191.server4you.de/crawler/)", - "WebSearch.COM.AU/3.0.1 (The Australian Search Engine; http://WebSearch.COM.AU; Search@WebSearch.COM.AU)", - "WebSearchBench WebCrawler v0.1(Experimental)", - "WebsiteWorth v1.0", - "Webspinne/1.0 webmaster@webspinne.de", - "Websquash.com (Add url robot)", - "WebStat/1.0 (Unix; beta; 20040314)", - "Webster v0.3 ( http://webster.healeys.net/ )", - "WebVac (webmaster@pita.stanford.edu)", - "Webverzeichnis.de - Telefon: 01908 / 26005", - "WebVulnCrawl.unknown/1.0 libwww-perl/5.803", - "Wells Search II", - "WEP Search 00", - "WFARC", - "whatUseek_winona/3.0", - "WhizBang! Lab", - "Willow Internet Crawler by Twotrees V2.1", - "WinHTTP Example/1.0", - "WinkBot/0.06 (Wink.com search engine web crawler; http://www.wink.com/Wink:WinkBot; winkbot@wink.com)", - "WIRE/0.11 (Linux; i686; Bot,Robot,Spider,Crawler,aromano@cli.di.unipi.it)", - "WIRE/0.x (Linux; i686; Bot,Robot,Spider,Crawler)", - "WISEbot/1.0 (WISEbot@koreawisenut.com; http://wisebot.koreawisenut.com)", - "worio heritrix bot (+http://worio.com/)", - "woriobot ( http://www.worio.com/)", - "WorldLight", - "Wotbox/alpha0.6 (bot@wotbox.com; http://www.wotbox.com)", - "Wotbox/alpha0.x.x (bot@wotbox.com; http://www.wotbox.com) Java/1.4.1_02", - "WSB WebCrawler V1.0 (Beta)- cl@cs.uni-dortmund.de", - "WSB, http://websearchbench.cs.uni-dortmund.de", - "wume_crawler/1.1 (http://wume.cse.lehigh.edu/~xiq204/crawler/)", - "Wwlib/Linux", - "www.arianna.it", - "WWWeasel Robot v1.00 (http://wwweasel.de)", - "wwwster/1.x (Beta- mailto:gue@cis.uni-muenchen.de)", - "X-Crawler ", - "xirq/0.1-beta (xirq; http://www.xirq.com; xirq@xirq.com)", - "xyro_(xcrawler@cosmos.inria.fr)", - "Y!J-BSC/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", - "Y!J-SRD/1.0", - "Y!J/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", - "yacy (www.yacy.net; v20040602; i386 Linux 2.4.26-gentoo-r13; java 1.4.2_06; MET/en)", - "yacybot (x86 Windows XP 5.1; java 1.5.0_06; Europe/de) yacy.net", - "Yahoo Pipes 1.0", - "Yahoo! Mindset", - "Yahoo-Blogs/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )", - "Yahoo-MMAudVid/1.0 (mms dash mmaudvidcrawler dash support at yahoo dash inc dot com)", - "Yahoo-MMAudVid/2.0(mms dash mm aud vid crawler dash support at yahoo dash inc.com ;Mozilla 4.0 compatible; MSIE 7.0;Windows NT 5.0; .NET CLR 2.0)", - "Yahoo-MMCrawler/3.x (mm dash crawler at trd dot overture dot com)", - "Yahoo-Test/4.0", - "Yahoo-VerticalCrawler-FormerWebCrawler/3.9 crawler at trd dot overture dot com; http://www.alltheweb.com/help/webmaster/crawler", - "YahooFeedSeeker/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://publisher.yahoo.com/rssguide)", - "YahooSeeker-Testing/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/)", - "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)", - "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/yahooseeker.html)", - "YahooSeeker/1.1 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)", - "YahooSeeker/bsv3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )", - "YahooSeeker/CafeKelsa-dev (compatible; Konqueror/3.2; FreeBSD ;cafekelsa-dev-webmaster@yahoo-inc.com )", - "Yandex/1.01.001 (compatible; Win16; I)", - "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)", - "yarienavoir.net/0.2", - "Yeti", - "Yeti/0.01 (nhn/1noon, yetibot@naver.com, check robots.txt daily and follows it)", - "Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)", - "yggdrasil/Nutch-0.9 (yggdrasil biorelated search engine; www dot biotec dot tu minus dresden do de slash schroeder; heiko dot dietze at biotec dot tu minus dresden dot de)", - "YodaoBot/1.0 (http://www.yodao.com/help/webmaster/spider/; )", - "yoofind/yoofind-0.1-dev (yoono webcrawler; http://www.yoono.com ; MyEmail)", - "yoogliFetchAgent/0.1", - "yoono/1.0 web-crawler/1.0", - "YottaCars_Bot/4.12 (+http://www.yottacars.com) Car Search Engine ", - "YottaShopping_Bot/4.12 (+http://www.yottashopping.com) Shopping Search Engine", - "Zao-Crawler", - "Zao-Crawler 0.2b", - "Zao/0.1 (http://www.kototoi.org/zao/)", - "ZBot/1.00 (icaulfield@zeus.com)", - "Zearchit", - "ZeBot_lseek.net (bot@ze.bz)", - "ZeBot_www.ze.bz (ze.bz@hotmail.com)", - "zedzo.digest/0.1 (http://www.zedzo.com/)", - "zermelo Mozilla/5.0 compatible; heritrix/1.12.1 (+http://www.powerset.com) [email:crawl@powerset.com,email:paul@page-store.com]", - "zerxbot/Version 0.6 libwww-perl/5.79", - "Zeus ThemeSite Viewer Webster Pro V2.9 Win32", - "Zeus xxxxx Webster Pro V2.9 Win32", - "Zeusbot/0.07 (Ulysseek's web-crawling robot; http://www.zeusbot.com; agent@zeusbot.com)", - "ZipppBot/0.xx (ZipppBot; http://www.zippp.net; webmaster@zippp.net)", - "ZIPPPCVS/0.xx (ZipppBot/.xx;http://www.zippp.net; webmaster@zippp.net)", - "Zippy v2.0 - Zippyfinder.com", - "ZoomSpider - wrensoft.com", - "zspider/0.9-dev http://feedback.redkolibri.com/", - "ZyBorg/1.0 (ZyBorg@WISEnut.com; http://www.WISEnut.com)"] + # ... keep all original LIST entries ... + ]).freeze end -end +end \ No newline at end of file diff --git a/impressionist.gemspec b/impressionist.gemspec index 20e8a3c1..f3c245df 100644 --- a/impressionist.gemspec +++ b/impressionist.gemspec @@ -5,8 +5,10 @@ require 'impressionist/version' Gem::Specification.new do |s| s.name = 'impressionist' - s.version = Impressionist::VERSION.dup s.platform = Gem::Platform::RUBY + s.version = Impressionist::VERSION + s.required_ruby_version = '>= 3.0' + s.add_dependency 'rails', '>= 6.0' s.licenses = ['MIT'] s.summary = 'Easy way to log impressions' s.email = 'john.mcaliley@gmail.com' @@ -20,7 +22,6 @@ Gem::Specification.new do |s| s.add_dependency "friendly_id" s.add_dependency 'nokogiri', RUBY_VERSION < '2.1.0' ? '~> 1.6.0' : '~> 1' - s.add_dependency 'rails', '>= 3.2.15' s.add_development_dependency 'bundler', '~> 2.0' s.add_development_dependency 'capybara' diff --git a/lib/impressionist.rb b/lib/impressionist.rb index 6623ac1a..63f5307f 100644 --- a/lib/impressionist.rb +++ b/lib/impressionist.rb @@ -1,12 +1,36 @@ +# frozen_string_literal: true + require 'impressionist/load' +require 'impressionist/engine' module Impressionist - # Define default ORM mattr_accessor :orm @@orm = :active_record - # Load configuration from initializer + mattr_accessor :max_params_size + @@max_params_size = 10_240 + + mattr_accessor :allowed_impressionable_types + @@allowed_impressionable_types = nil + + mattr_accessor :log_ip_address + @@log_ip_address = true + + mattr_accessor :log_referrer + @@log_referrer = true + + mattr_accessor :log_params + @@log_params = true + + mattr_accessor :log_session_hash + @@log_session_hash = true + def self.setup yield self end -end + + def self.valid_impressionable_type?(type) + return true if allowed_impressionable_types.nil? + allowed_impressionable_types.include?(type) + end +end \ No newline at end of file diff --git a/lib/impressionist/bots.rb b/lib/impressionist/bots.rb index 93c11d2d..1577399e 100644 --- a/lib/impressionist/bots.rb +++ b/lib/impressionist/bots.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'timeout' require 'net/http' require 'nokogiri' @@ -5,6 +7,7 @@ module Impressionist module Bots LIST_URL = "http://www.user-agents.org/allagents.xml" + def self.consume Timeout.timeout(4) do response = Net::HTTP.get(URI.parse(LIST_URL)) @@ -12,10 +15,10 @@ def self.consume list = [] doc.xpath('//user-agent').each do |agent| type = agent.xpath("Type").text - list << agent.xpath("String").text.gsub("<","<") if ["R","S"].include?(type) #gsub hack for badly formatted data + list << agent.xpath("String").text.gsub("<", "<") if ["R", "S"].include?(type) end list end end end -end +end \ No newline at end of file diff --git a/lib/impressionist/counter_cache.rb b/lib/impressionist/counter_cache.rb index fb38448e..5808f78e 100644 --- a/lib/impressionist/counter_cache.rb +++ b/lib/impressionist/counter_cache.rb @@ -1,76 +1,99 @@ +# frozen_string_literal: true + module Impressionist module CounterCache - attr_reader :impressionable_class, :entity private - # A valid impression must - # have a valid impressionable class - # be counter_caching - # have a record saved in the db - # then it should give it a try - def impressionable_counter_cache_updatable? - updatable? && impressionable_try - end + def impressionable_counter_cache_updatable? + updatable? && impressionable_try + end - def updatable? - valid_impressionable_class? && impressionable_find - end + def updatable? + valid_impressionable_class? && impressionable_find + end - def valid_impressionable_class? - set_impressionable_class && counter_caching? + def valid_impressionable_class? + set_impressionable_class && counter_caching? + end + + def set_impressionable_class + klass_name = impressionable_type + return false if klass_name.blank? + + unless klass_name.match?(/\A[A-Za-z][A-Za-z0-9_:]*\z/) + impressionist_log("Invalid impressionable_type format: #{klass_name.inspect}") + return false end - def set_impressionable_class - klass = self.impressionable_type || false - @impressionable_class = klass. - to_s.safe_constantize || false + if Impressionist.allowed_impressionable_types.present? + unless Impressionist.valid_impressionable_type?(klass_name) + impressionist_log("Impressionable type not in allowlist: #{klass_name}") + return false + end end - def impressionist_log(str, mode=:error) - Rails.logger.send(mode.to_s, str) + klass = klass_name.safe_constantize + + if klass.nil? + impressionist_log("Could not constantize: #{klass_name}") + return false end - # receives an entity(instance of a Model) and then tries to update - # counter_cache column - # entity is a impressionable instance model - def impressionable_try - entity.try(:update_impressionist_counter_cache) + unless klass < ActiveRecord::Base + impressionist_log("#{klass_name} is not an ActiveRecord model") + return false end - def impressionable_find - exeception_rescuer { - @entity = impressionable_class.find(self.impressionable_id) - } - @entity + @impressionable_class = klass + true + end + + def impressionist_log(str, mode = :error) + Rails.logger.send(mode.to_s, "[Impressionist] #{str}") + end + + def impressionable_try + entity.try(:update_impressionist_counter_cache) + end + def impressionable_find + id = impressionable_id + return false if id.blank? + + unless id.to_s.match?(/\A(\d+|[a-f0-9\-]{36})\z/i) + impressionist_log("Invalid impressionable_id format: #{id.inspect}") + return false end - def counter_caching? - if impressionable_class.respond_to?(:impressionist_counter_caching?) - impressionable_class.impressionist_counter_caching? - else - false - end + exception_rescuer do + @entity = impressionable_class.find(id) end + @entity.present? + end - # Returns false, as it is only handling one exeception - # It would make updatable to fail thereafter it would not try - # to update cache_counter - def exeception_rescuer - begin - yield - rescue ActiveRecord::RecordNotFound - exeception_to_log - false - end + def counter_caching? + if impressionable_class.respond_to?(:impressionist_counter_caching?) + impressionable_class.impressionist_counter_caching? + else + false end + end - def exeception_to_log - impressionist_log("Couldn't find Widget with id=#{self.impressionable_id}") - end + def exception_rescuer + yield + rescue ActiveRecord::RecordNotFound + exception_to_log + false + rescue StandardError => e + impressionist_log("Unexpected error finding impressionable: #{e.message}") + false + end + def exception_to_log + impressionist_log("Couldn't find #{impressionable_class} with id=#{impressionable_id}") + end end -end +end \ No newline at end of file diff --git a/lib/impressionist/engine.rb b/lib/impressionist/engine.rb index 54e4c862..3f8ef8df 100644 --- a/lib/impressionist/engine.rb +++ b/lib/impressionist/engine.rb @@ -1,30 +1,34 @@ +# frozen_string_literal: true + module Impressionist class Engine < ::Rails::Engine attr_accessor :orm - initializer 'impressionist.model' do |app| - @orm = Impressionist.orm - include_orm - end + initializer 'impressionist.model' do |_app| + @orm = Impressionist.orm + include_orm + end + initializer 'impressionist.controller' do + # Require the controller module from app/controllers + require "#{root}/app/controllers/impressionist_controller" - initializer 'impressionist.controller' do - require "impressionist/controllers/mongoid/impressionist_controller" if orm == :mongoid.to_s - - ActiveSupport.on_load(:action_controller) do - include ImpressionistController::InstanceMethods - extend ImpressionistController::ClassMethods - end - end + if orm == :mongoid.to_s + require "impressionist/controllers/mongoid/impressionist_controller" + end + ActiveSupport.on_load(:action_controller) do + include ::ImpressionistController::InstanceMethods + extend ::ImpressionistController::ClassMethods + end + end - private + private def include_orm require "#{root}/app/models/impressionist/impressionable.rb" require "impressionist/models/#{orm}/impression.rb" require "impressionist/models/#{orm}/impressionist/impressionable.rb" end - end -end +end \ No newline at end of file diff --git a/lib/impressionist/is_impressionable.rb b/lib/impressionist/is_impressionable.rb index 9a4205a9..9f01a76b 100644 --- a/lib/impressionist/is_impressionable.rb +++ b/lib/impressionist/is_impressionable.rb @@ -1,23 +1,70 @@ +# frozen_string_literal: true + module Impressionist module IsImpressionable extend ActiveSupport::Concern module ClassMethods - def is_impressionable(options={}) + attr_accessor :impressionist_cache_options + + DEFAULT_CACHE = { + counter_cache: false, + column_name: :impressions_count, + unique: :all + }.freeze + + def is_impressionable(options = {}) define_association @impressionist_cache_options = options true end + def impressionist_counter_cache_options + @impressionist_cache_options ||= {} + @impressionist_cache_options.reverse_merge(DEFAULT_CACHE) + end + + def impressionist_counter_caching? + impressionist_counter_cache_options[:counter_cache] + end + private def define_association - has_many(:impressions, - :as => :impressionable, - :dependent => :delete_all) + has_many :impressions, + as: :impressionable, + dependent: :delete_all + end + end + + # Instance methods - THIS WAS MISSING! + def impressionable? + true + end + + def impressionist_count(options = {}) + options.reverse_merge!(filter: :request_hash, start_date: nil, end_date: Time.now) + + imps = if options[:start_date].blank? + impressions + else + impressions.where('created_at >= ? and created_at <= ?', options[:start_date], options[:end_date]) + end + + imps = imps.where('impressions.message = ?', options[:message]) if options[:message] + + distinct = options[:filter] != :all + if distinct + imps.select(options[:filter]).distinct.count + else + imps.count end end + def update_impressionist_counter_cache + slave = Impressionist::UpdateCounters.new(self) + slave.update + end end -end +end \ No newline at end of file diff --git a/lib/impressionist/load.rb b/lib/impressionist/load.rb index c97de489..69ac6440 100644 --- a/lib/impressionist/load.rb +++ b/lib/impressionist/load.rb @@ -1,11 +1,7 @@ -require 'impressionist/setup_association' +# frozen_string_literal: true +require 'impressionist/setup_association' require 'impressionist/counter_cache' - require 'impressionist/update_counters' - -require 'impressionist/rails_toggle' - -require 'impressionist/is_impressionable' - -require 'impressionist/engine' +require 'impressionist/bots' +require 'impressionist/is_impressionable' \ No newline at end of file diff --git a/lib/impressionist/models/active_record/impression.rb b/lib/impressionist/models/active_record/impression.rb index 7670160c..ae0a823d 100644 --- a/lib/impressionist/models/active_record/impression.rb +++ b/lib/impressionist/models/active_record/impression.rb @@ -1,14 +1,25 @@ -# Responsability -# * logs an error if imps_id and imps_type can not be found -# * asks updatable? whether it may or may not be updated +# frozen_string_literal: true class Impression < ActiveRecord::Base - include Impressionist::CounterCache - # sets belongs_to and attr_accessible depending on Rails version Impressionist::SetupAssociation.new(self).set store :params + + before_save :sanitize_attributes after_save :impressionable_counter_cache_updatable? -end + + private + + def sanitize_attributes + self.controller_name = controller_name&.slice(0, 255) + self.action_name = action_name&.slice(0, 255) + self.view_name = view_name&.slice(0, 255) if respond_to?(:view_name) && view_name.present? + self.request_hash = request_hash&.slice(0, 255) + self.session_hash = session_hash&.slice(0, 255) + self.ip_address = ip_address&.slice(0, 45) + self.referrer = referrer&.slice(0, 2048) + self.message = message&.slice(0, 1024) if respond_to?(:message) && message.present? + end +end \ No newline at end of file diff --git a/lib/impressionist/models/active_record/impressionist/impressionable.rb b/lib/impressionist/models/active_record/impressionist/impressionable.rb index 345e64ba..275be90b 100644 --- a/lib/impressionist/models/active_record/impressionist/impressionable.rb +++ b/lib/impressionist/models/active_record/impressionist/impressionable.rb @@ -1,12 +1,9 @@ -module Impressionist +# frozen_string_literal: true +module Impressionist module Impressionable - - # extends AS::Concern - include Impressionist::IsImpressionable + include Impressionist::IsImpressionable end - end -ActiveRecord::Base. -send(:include, Impressionist::Impressionable) +ActiveRecord::Base.include(Impressionist::Impressionable) \ No newline at end of file diff --git a/lib/impressionist/rails_toggle.rb b/lib/impressionist/rails_toggle.rb deleted file mode 100644 index 19d35665..00000000 --- a/lib/impressionist/rails_toggle.rb +++ /dev/null @@ -1,26 +0,0 @@ -module Impressionist - # Responsibility - # Toggles between rails > 3.1 < 4 - # In order to make attr_accessible available in a rails app < 4 - - class RailsToggle - # decides where or not to include attr_accessible - def should_include? - supported_by_rails? && (not using_strong_parameters?) - end - - private - - def using_strong_parameters? - defined?(StrongParameters) - end - - # returns false if rails >= 4 - # true if rails < 4 - def supported_by_rails? - ::Rails::VERSION::MAJOR.to_i < 4 - end - - end - -end diff --git a/lib/impressionist/setup_association.rb b/lib/impressionist/setup_association.rb index b5c3c16e..875236af 100644 --- a/lib/impressionist/setup_association.rb +++ b/lib/impressionist/setup_association.rb @@ -1,53 +1,21 @@ +# frozen_string_literal: true + module Impressionist - # Impressionist::SetupAssociation.new(entity).set class SetupAssociation def initialize(receiver) @receiver = receiver end - # True or False - # Note toggle returns false if rails >= 4 - def include_attr_acc? - toggle && make_accessible - end - def define_belongs_to - if ::Rails::VERSION::MAJOR.to_i >= 5 - receiver.belongs_to(:impressionable, :polymorphic => true, :optional => true) - else - receiver.belongs_to(:impressionable, :polymorphic => true) - end + receiver.belongs_to(:impressionable, polymorphic: true, optional: true) end def set define_belongs_to - include_attr_acc? end private - attr_reader :receiver, :toggle - def make_accessible - receiver. - attr_accessible(:impressionable_type, - :impressionable_id, - :controller_name, - :request_hash, - :session_hash, - :action_name, - :ip_address, - :view_name, - :referrer, - :message, - :user_id, - :params) - end - - def toggle - t = RailsToggle.new - t.should_include? - end + attr_reader :receiver end -end - - +end \ No newline at end of file diff --git a/lib/impressionist/update_counters.rb b/lib/impressionist/update_counters.rb index a305bc55..875eec5e 100644 --- a/lib/impressionist/update_counters.rb +++ b/lib/impressionist/update_counters.rb @@ -1,8 +1,6 @@ -# Note -# If impressionist_counter_cache_options[:counter_cache] is false(default) -# it won't even run this class -module Impressionist +# frozen_string_literal: true +module Impressionist class UpdateCounters attr_reader :receiver, :klass @@ -12,46 +10,40 @@ def initialize(receiver) end def update - klass. - update_counters(id, column_name => result) + return unless valid_update? + + klass.update_counters(id, column_name => result) end private + def valid_update? + receiver.present? && + klass.respond_to?(:update_counters) && + column_name.present? && + klass.column_names.include?(column_name) + end + def result impressions_total - impressions_cached end - # Count impressions based on unique_filter - # default is :ip_address when unique: true def impressions_total - receiver.impressionist_count filter + receiver.impressionist_count(filter) end - # Fetch impressions from a receiver's column def impressions_cached receiver.send(column_name) || 0 end def filter - {:filter => unique_filter} + { filter: unique_filter } end - # :filter gets assigned to :ip_address as default - # One could do - # is_impressionable :counter_cache => true, - # :unique => :any_other_filter def unique_filter - # Support `is_impressionable :counter_cache => true, :unique => true` - # defaulting to `:ip_address` for counting unique impressions. return :ip_address if unique == true - - # Should a user try `is_impressionable :counter_cache => true, :unique => false` - # then support that as well return :all if unique == false - # Otherwise set the filter to either what the user supplied as the `unique` option - # or the default (`:all`) unique end @@ -64,14 +56,11 @@ def column_name end def cache_options - klass. - impressionist_counter_cache_options + klass.impressionist_counter_cache_options end def id receiver.id end - end - -end +end \ No newline at end of file diff --git a/lib/impressionist/version.rb b/lib/impressionist/version.rb index 2026a47f..45689e91 100644 --- a/lib/impressionist/version.rb +++ b/lib/impressionist/version.rb @@ -1,3 +1,3 @@ module Impressionist - VERSION = "2.0.0" + VERSION = "2.1.0" end