'Walhello appie', 'architext' => 'ArchitextSpider', 'jeeves' => 'Ask', 'bjaaland' => 'Bjaaland', 'ferret' => 'Wild Ferret Web Hopper #1, #2, #3', 'googlebot' => 'Googlebot', 'gulliver' => 'Northern Light Gulliver', 'virus\_detector' => 'virus_detector', 'harvest' => 'Harvest', 'htdig' => 'ht://Dig', 'linkwalker' => 'LinkWalker', 'lycos_' => 'Lycos', 'moget' => 'moget', 'muscatferret' => 'Muscat Ferret', 'myweb' => 'Internet Shinchakubin', 'nomad' => 'Nomad', 'scooter' => 'Scooter', 'yahoo!\sslurp\schina' => 'Yahoo! Slurp China', 'slurp' => 'Yahoo Slurp', '^voyager\/' => 'Voyager', 'weblayers' => 'Weblayers', # Common robots (Not in robot file) 'antibot' => 'Antibot', 'bruinbot' => 'The web archive', 'digout4u' => 'Digout4u', 'echo!' => 'EchO!', 'fast\-webcrawler' => 'Fast-Webcrawler', 'ia_archiver\-web\.archive\.org' => 'The web archive (IA Archiver)', 'ia_archiver' => 'Alexa (IA Archiver)', 'jennybot' => 'JennyBot', 'mercator' => 'Mercator', 'msnbot\-media' => 'MSNBot-media', 'msnbot' => 'MSNBot', 'netcraft' => 'Netcraft', 'petersnews' => 'Petersnews', 'unlost_web_crawler' => 'Unlost Web Crawler', 'voila' => 'Voila', 'webbase' => 'WebBase', 'zyborg' => 'ZyBorg', 'wisenutbot' => 'WISENutbot', 'webcollage' => 'WebCollage', 'cfetch' => 'Cfetch', # Less common robots (In robot file) '[^a]fish' => 'Fish search', 'abcdatos' => 'ABCdatos BotLink', 'acme\.spider' => 'Acme.Spider', 'ahoythehomepagefinder' => 'Ahoy! The Homepage Finder', 'alkaline' => 'Alkaline', 'anthill' => 'Anthill', 'arachnophilia' => 'Arachnophilia', 'arale' => 'Arale', 'araneo' => 'Araneo', 'aretha' => 'Aretha', 'ariadne' => 'ARIADNE', 'powermarks' => 'Powermarks', # must come before Arks; seen used by referrer spam 'arks' => 'arks', 'aspider' => 'ASpider (Associative Spider)', 'atn\.txt' => 'ATN Worldwide', 'atomz' => 'Atomz.com Search Robot', 'auresys' => 'AURESYS', 'backrub' => 'BackRub', 'bbot' => 'BBot', 'bigbrother' => 'Big Brother', 'blackwidow' => 'BlackWidow', 'blindekuh' => 'Die Blinde Kuh', 'bloodhound' => 'Bloodhound', 'borg\-bot' => 'Borg-Bot', 'brightnet' => 'bright.net caching robot', 'bspider' => 'BSpider', 'cactvschemistryspider' => 'CACTVS Chemistry Spider', 'calif[^r]' => 'Calif', 'cassandra' => 'Cassandra', 'cgireader' => 'Digimarc Marcspider/CGI', 'checkbot' => 'Checkbot', 'christcrawler' => 'ChristCrawler.com', 'churl' => 'churl', 'cienciaficcion' => 'cIeNcIaFiCcIoN.nEt', 'collective' => 'Collective', 'combine' => 'Combine System', 'conceptbot' => 'Conceptbot', 'coolbot' => 'CoolBot', 'core' => 'Web Core / Roots', 'cosmos' => 'XYLEME Robot', 'cruiser' => 'Internet Cruiser Robot', 'cusco' => 'Cusco', 'cyberspyder' => 'CyberSpyder Link Test', 'desertrealm' => 'Desert Realm Spider', 'deweb' => 'DeWeb(c) Katalog/Index', 'dienstspider' => 'DienstSpider', 'digger' => 'Digger', 'diibot' => 'Digital Integrity Robot', 'direct_hit' => 'Direct Hit Grabber', 'dnabot' => 'DNAbot', 'download_express' => 'DownLoad Express', 'dragonbot' => 'DragonBot', 'dwcp' => 'DWCP (Dridus\' Web Cataloging Project)', 'e\-collector' => 'e-collector', 'ebiness' => 'EbiNess', 'elfinbot' => 'ELFINBOT', 'emacs' => 'Emacs-w3 Search Engine', 'emcspider' => 'ananzi', 'esther' => 'Esther', 'evliyacelebi' => 'Evliya Celebi', 'fastcrawler' => 'FastCrawler', 'fdse' => 'Fluid Dynamics Search Engine robot', 'felix' => 'Felix IDE', 'fetchrover' => 'FetchRover', 'fido' => 'fido', 'finnish' => 'H���ki', 'fireball' => 'KIT-Fireball', 'fouineur' => 'Fouineur', 'francoroute' => 'Robot Francoroute', 'freecrawl' => 'Freecrawl', 'funnelweb' => 'FunnelWeb', 'gama' => 'gammaSpider, FocusedCrawler', 'gazz' => 'gazz', 'gcreep' => 'GCreep', 'getbot' => 'GetBot', 'geturl' => 'GetURL', 'golem' => 'Golem', 'grapnel' => 'Grapnel/0.01 Experiment', 'griffon' => 'Griffon', 'gromit' => 'Gromit', 'gulperbot' => 'Gulper Bot', 'hambot' => 'HamBot', 'havindex' => 'havIndex', 'hometown' => 'Hometown Spider Pro', 'htmlgobble' => 'HTMLgobble', 'hyperdecontextualizer' => 'Hyper-Decontextualizer', 'iajabot' => 'iajaBot', 'iconoclast' => 'Popular Iconoclast', 'ilse' => 'Ingrid', 'imagelock' => 'Imagelock', 'incywincy' => 'IncyWincy', 'informant' => 'Informant', 'infoseek' => 'InfoSeek Robot 1.0', 'infoseeksidewinder' => 'Infoseek Sidewinder', 'infospider' => 'InfoSpiders', 'inspectorwww' => 'Inspector Web', 'intelliagent' => 'IntelliAgent', 'irobot' => 'I, Robot', 'iron33' => 'Iron33', 'israelisearch' => 'Israeli-search', 'javabee' => 'JavaBee', 'jbot' => 'JBot Java Web Robot', 'jcrawler' => 'JCrawler', 'jobo' => 'JoBo Java Web Robot', 'jobot' => 'Jobot', 'joebot' => 'JoeBot', 'jubii' => 'The Jubii Indexing Robot', 'jumpstation' => 'JumpStation', 'kapsi' => 'image.kapsi.net', 'katipo' => 'Katipo', 'kilroy' => 'Kilroy', 'ko_yappo_robot' => 'KO_Yappo_Robot', 'kummhttp' => 'KummHttp', 'labelgrabber\.txt' => 'LabelGrabber', 'larbin' => 'larbin', 'legs' => 'legs', 'linkidator' => 'Link Validator', 'linkscan' => 'LinkScan', 'lockon' => 'Lockon', 'logo_gif' => 'logo.gif Crawler', 'macworm' => 'Mac WWWWorm', 'lmspider' => 'lmspider', 'lwp\-request' => 'lwp-request', 'lwp\-trivial' => 'lwp-trivial', 'magpie' => 'MagpieRSS', 'marvin' => 'marvin/infoseek', 'mattie' => 'Mattie', 'mediafox' => 'MediaFox', 'merzscope' => 'MerzScope', 'meshexplorer' => 'NEC-MeshExplorer', 'mindcrawler' => 'MindCrawler', 'mnogosearch' => 'mnoGoSearch search engine software', 'momspider' => 'MOMspider', 'monster' => 'Monster', 'motor' => 'Motor', 'muncher' => 'Muncher', 'mwdsearch' => 'Mwd.Search', 'ndspider' => 'NDSpider', 'nederland\.zoek' => 'Nederland.zoek', 'netcarta' => 'NetCarta WebMap Engine', 'netmechanic' => 'NetMechanic', 'netscoop' => 'NetScoop', 'newscan\-online' => 'newscan-online', 'nhse' => 'NHSE Web Forager', 'northstar' => 'The NorthStar Robot', 'nzexplorer' => 'nzexplorer', 'objectssearch' => 'ObjectsSearch', 'occam' => 'Occam', 'octopus' => 'HKU WWW Octopus', 'openfind' => 'Openfind data gatherer', 'orb_search' => 'Orb Search', 'packrat' => 'Pack Rat', 'pageboy' => 'PageBoy', 'parasite' => 'ParaSite', 'patric' => 'Patric', 'pegasus' => 'pegasus', 'perignator' => 'The Peregrinator', 'perlcrawler' => 'PerlCrawler 1.0', 'phantom' => 'Phantom', 'phpdig' => 'PhpDig', 'piltdownman' => 'PiltdownMan', 'pimptrain' => 'Pimptrain.com\'s robot', 'pioneer' => 'Pioneer', 'pitkow' => 'html_analyzer', 'pjspider' => 'Portal Juice Spider', 'plumtreewebaccessor' => 'PlumtreeWebAccessor', 'poppi' => 'Poppi', 'portalb' => 'PortalB Spider', 'psbot' => 'psbot', 'python' => 'Python-urllib', 'raven' => 'Raven Search', 'rbse' => 'RBSE Spider', 'resumerobot' => 'Resume Robot', 'rhcs' => 'RoadHouse Crawling System', 'road_runner' => 'Road Runner: The ImageScape Robot', 'robbie' => 'Robbie the Robot', 'robi' => 'ComputingSite Robi/1.0', 'robocrawl' => 'RoboCrawl Spider', 'robofox' => 'RoboFox', 'robozilla' => 'Robozilla', 'roverbot' => 'Roverbot', 'rules' => 'RuLeS', 'safetynetrobot' => 'SafetyNet Robot', 'search\-info' => 'Sleek', 'search_au' => 'Search.Aus-AU.COM', 'searchprocess' => 'SearchProcess', 'senrigan' => 'Senrigan', 'sgscout' => 'SG-Scout', 'shaggy' => 'ShagSeeker', 'shaihulud' => 'Shai\'Hulud', 'sift' => 'Sift', 'simbot' => 'Simmany Robot Ver1.0', 'site\-valet' => 'Site Valet', 'sitetech' => 'SiteTech-Rover', 'skymob' => 'Skymob.com', 'slcrawler' => 'SLCrawler', 'smartspider' => 'Smart Spider', 'snooper' => 'Snooper', 'solbot' => 'Solbot', 'speedy' => 'Speedy Spider', 'spider_monkey' => 'spider_monkey', 'spiderbot' => 'SpiderBot', 'spiderline' => 'Spiderline Crawler', 'spiderman' => 'SpiderMan', 'spiderview' => 'SpiderView(tm)', 'spry' => 'Spry Wizard Robot', 'ssearcher' => 'Site Searcher', 'sqworm' => 'Sqworm', 'suke' => 'Suke', 'sunrise' => 'Sunrise', 'suntek' => 'suntek search engine', 'sven' => 'Sven', 'tach_bw' => 'TACH Black Widow', 'tagyu\sagent' => 'Tagyu Agent', 'tarantula' => 'Tarantula', 'tarspider' => 'tarspider', 'techbot' => 'TechBOT', 'templeton' => 'Templeton', 'titan' => 'TITAN', 'titin' => 'TitIn', 'tkwww' => 'The TkWWW Robot', 'tlspider' => 'TLSpider', 'ucsd' => 'UCSD Crawl', 'udmsearch' => 'UdmSearch', 'universalfeedparser' => 'UniversalFeedParser', 'urlck' => 'URL Check', 'valkyrie' => 'Valkyrie', 'verticrawl' => 'Verticrawl', 'victoria' => 'Victoria', 'visionsearch' => 'vision-search', 'voidbot' => 'void-bot', 'vwbot' => 'VWbot', 'w3index' => 'The NWI Robot', 'w3m2' => 'W3M2', 'wallpaper' => 'WallPaper (alias crawlpaper)', 'wanderer' => 'the World Wide Web Wanderer', 'wapspider' => 'w@pSpider by wap4.com', 'webbandit' => 'WebBandit Web Spider', 'webcatcher' => 'WebCatcher', 'webcopy' => 'WebCopy', 'webfetcher' => 'webfetcher', 'webfoot' => 'The Webfoot Robot', 'webinator' => 'Webinator', 'weblinker' => 'WebLinker', 'webmirror' => 'WebMirror', 'webmoose' => 'The Web Moose', 'webquest' => 'WebQuest', 'webreader' => 'Digimarc MarcSpider', 'webreaper' => 'WebReaper', 'websnarf' => 'Websnarf', 'webspider' => 'WebSpider', 'webvac' => 'WebVac', 'webwalk' => 'webwalk', 'webwalker' => 'WebWalker', 'webwatch' => 'WebWatch', 'whatuseek' => 'whatUseek Winona', 'whowhere' => 'WhoWhere Robot', 'wired\-digital' => 'Wired Digital', 'wmir' => 'w3mir', 'wolp' => 'WebStolperer', 'wombat' => 'The Web Wombat', 'wordpress' => 'WordPress', 'worm' => 'The World Wide Web Worm', 'wwwc' => 'WWWC Ver 0.2.5', 'wz101' => 'WebZinger', 'xget' => 'XGET', # Other robots reported by users '1\-more\sscanner' => '1-More Scanner', 'accoona\-ai\-agent' => 'Accoona-AI-Agent', 'activebookmark' => 'ActiveBookmark', 'adamm\sbot' => 'AdamM Bot', 'almaden' => 'IBM Almaden Research Center WebFountain™', 'aipbot' => 'aipbot', 'aleadsoftbot' => 'ALeadSoftbot', 'alpha\ssearch\sagent' => 'Alpha Search Agent', 'aport' => 'Aport', 'archive\.org_bot' => 'archive.org bot', 'argus' => 'Argus', 'arianna\.libero\.it' => 'arianna.libero.it', 'aspseek' => 'ASPseek', 'asterias' => 'Asterias', 'awbot' => 'AWBot', 'baiduspider' => 'BaiDuSpider', 'becomebot' => 'BecomeBot', 'bender' => 'bender focused_crawler', 'biglotron' => 'Biglotron', 'bittorrent\sbot' => 'BitTorrent Bot', 'biz360\sspider' => 'Biz360 spider', 'blogbridge\sservice' => 'BlogBridge Service', 'bloglines' => 'Bloglines', 'blogpulse' => 'BlogPulse ISSpider intelliseek.com', 'blogsearch' => 'BlogSearch', 'blogshares' => 'Blogshares Spiders', 'blogslive' => 'Blogslive', 'blogssay' => 'BlogsSay :: RSS Search Crawler', 'bncf\.firenze\.sbn\.it\/raccolta\.txt' => 'Biblioteca Nazionale Centrale di Firenze', 'bobby' => 'Bobby', 'boitho\.com\-dc' => 'boitho.com-dc', 'bookmark\-manager' => 'Bookmark-Manager', 'boris' => 'Boris', 'bumblebee' => 'Bumblebee (relevare.com)', 'candlelight\_favorites\_inspector' => 'Candlelight_Favorites_Inspector', 'cbn00glebot' => 'cbn00glebot', 'cerberian\sdrtrs' => 'Cerberian Drtrs', 'cfnetwork' => 'CFNetwork', 'cipinetbot' => 'CipinetBot', 'checkweb\slink\svalidator' => 'CheckWeb link validator', 'commons\-httpclient' => 'Jakarta commons-httpclient', 'computer\sand\sautomation\sresearch\sinstitute\scrawler' => 'Computer and Automation Research Institute Crawler', 'converamultimediacrawler' => 'ConveraMultiMediaCrawler', 'converacrawler' => 'ConveraCrawler', 'cscrawler' => 'CsCrawler', 'cse\shtml\svalidator\slite\sonline' => 'CSE HTML Validator Lite Online','cuasarbot','Cuasarbot', 'cursor' => 'Cursor', 'custo' => 'Custo', 'datafountains\/dmoz\sdownloader' => 'DataFountains/DMOZ Downloader', 'daviesbot' => 'DaviesBot', 'daypopbot' => 'DayPop', 'deepindex' => 'Deepindex', 'dipsie\.bot' => 'Dipsie', 'dnsgroup' => 'DNSGroup', 'docomo' => 'DoCoMo', 'domainchecker' => 'DomainChecker', 'domainsdb\.net' => 'DomainsDB.net', 'dulance' => 'Dulance', 'dumbot' => 'Dumbot', 'dumm\.de\-bot' => 'dumm.de-Bot', 'earthcom\.info' => 'EARTHCOM.info', 'easydl' => 'EasyDL', 'edgeio\-retriever' => 'edgeio-retriever', 'ets\sv' => 'ETS Enterprise Translation Server', 'exactseek' => 'ExactSeek Crawler', 'extreme\_picture\_finder' => 'Extreme_Picture_Finder', 'eventax' => 'eventax', 'everbeecrawler' => 'EverbeeCrawler', 'everest\-vulcan' => 'Everest-Vulcan', 'ezresult' => 'Ezresult', 'enteprise' => 'Fast Enteprise Crawler', 'fast\-search\-engine' => 'Fast-Search-Engine (not fastsearch.com)', 'fast\senterprise\scrawler' => 'FAST Enterprise Crawler', 'fast\senterprise\scrawler.*scrawleradmin\.t\-info@telekom\.de' => 'FAST Enterprise Crawler * crawleradmin.t-info@telekom.de', 'matrix\ss\.p\.a\.\s\-\sfast\senterprise\scrawler' => 'Matrix S.p.A. - FAST Enterprise Crawler', 'fast\senterprise\scrawler.*t\-info_bi_cluster\scrawleradmin\.t\-info@telekom\.de' => 'FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de', 'favorg' => 'FavOrg', 'favorites\ssweeper' => 'Favorites Sweeper', 'feedburner' => 'Feedburner', 'feedfetcher\-google' => 'Feedfetcher-Google', 'feedflow' => 'FeedFlow', 'feedster' => 'Feedster', 'feedvalidator' => 'FeedValidator', 'filmkamerabot' => 'FilmkameraBot', 'findexa\scrawler' => 'Findexa Crawler', 'geniebot' => 'Geniebot', 'findlinks' => 'Findlinks', 'fooky\.com\/ScorpionBot' => 'Fooky.com/ScorpionBot/ScoutOut', 'g2crawler' => 'G2Crawler', 'gaisbot' => 'Gaisbot', 'gigabot' => 'GigaBot', 'girafabot' => 'Girafabot', 'global\sfetch' => 'Global Fetch', 'gnodspider' => 'GNOD Spider', 'goforit\.com' => 'GoForIt.com', 'goforitbot' => 'GOFORITBOT', 'gpu\sp2p\scrawler' => 'GPU p2p crawler', 'grub' => 'Grub.org', 'henrythemiragorobot' => 'Mirago', 'heritrix' => 'Heritrix', 'holmes' => 'Holmes', 'hoowwwer' => 'HooWWWer', 'hpprint' => 'HPPrint', 'htmlparser' => 'HTMLParser', 'html\_link\_validator' => 'Html_Link_Validator', 'httrack' => 'HTTrack off-line browser', 'hundesuche\.com\-bot' => 'Hundesuche.com-Bot', 'ichiro' => 'ichiro', 'iltrovatore\-setaccio' => 'IlTrovatore-Setaccio', 'infobot' => 'InfoBot', 'infociousbot' => 'InfociousBot', 'infomine' => 'INFOMINE VLCrawler', 'insurancobot' => 'InsurancoBot', 'internet\_ninja' => 'Internet_Ninja ', 'internetarchive' => 'InternetArchive', 'internetseer' => 'InternetSeer', 'internetsupervision' => 'InternetSupervision', 'irlbot' => 'IRLbot', 'isearch2006' => 'isearch2006', 'iupui_research_bot' => 'IUPUI_Research_Bot', 'jrtwine\_software\_check\_favorites\_utility' => 'JRTwine_Software_Check_Favorites_Utility', 'justview' => 'JustView', 'kalambot' => 'KalamBot', 'kamano\.de\snewsfeedverzeichnis' => 'kamano.de NewsFeedVerzeichnis', 'kazoombot' => 'KazoomBot', 'kevin' => 'Kevin', 'keyoshid' => 'Yahoo! Japan keyoshid robot study', 'kinjabot' => 'Kinjabot', 'kinja\-imagebot' => 'Kinja Imagebot', 'knowitall' => 'KnowItAll', 'knowledge\.com' => 'Knowledge.com', 'kouaa\skrawler' => 'Kouaa Krawler', 'krugle' => 'Krugle', 'ksibot' => 'ksibot', 'kurzor' => 'Kurzor', 'lanshanbot' => 'lanshanbot', 'letscrawl\.com' => 'LetsCrawl.com', 'libcrawl' => 'Crawl libcrawl', 'link\svalet\sonline' => 'Link Valet Online', 'linkbot' => 'LinkBot', 'linkchecker' => 'LinkChecker', 'livejournal\.com' => 'LiveJournal.com', 'magpierss' => 'MagpieRSS', 'mapoftheinternet\.com' => 'MapoftheInternet.com', 'mediapartners\-google' => 'Google AdSense', 'megite' => 'Megite', 'metager\-linkchecker' => 'MetaGer LinkChecker', 'metaspinner' => 'Metaspinner', 'microsoft\surl\scontrol' => 'Microsoft URL Control', 'minirank' => 'miniRank', 'mini\-reptile' => 'Mini-reptile', 'missigua\slocator' => 'Missigua_Locator', 'misterbot' => 'Misterbot', 'miva' => 'Miva', 'mizzu\slabs' => 'Mizzu Labs', 'mj12bot' => 'MJ12bot', 'mojeekbot' => 'MojeekBot', 'tencenttraveler' => 'TencentTraveler', # Must be before msiecrawler. 'msiecrawler' => 'MSIECrawler', 'ms\ssearch\s4\.0\srobot' => 'MS SharePoint Portal Server - MS Search 4.0 Robot', 'msrabot' => 'msrabot', 'msrbot' => 'MSRBOT', 'mt::telegraph::agent' => 'MT::Telegraph::Agent', 'mydoyouhike' => 'Mydoyouhike', 'nagios' => 'Nagios', 'nasa\ssearch' => 'NASA Search', 'netluchs' => 'Netluchs', 'netsprint' => 'NetSprint', 'newsgatoronline' => 'NewsGator Online', 'nicebot' => 'nicebot', 'nimblecrawler' => 'NimbleCrawler', 'noxtrumbot' => 'noxtrumbot', 'npbot' => 'NPBot', 'nutchcvs' => 'NutchCVS', 'nutchosu\-vlib' => 'NutchOSU-VLIB', 'nutch' => 'Nutch', 'ocelli' => 'Ocelli', 'octora\sbeta\sbot' => 'Octora Beta Bot', 'omniexplorer\_bot' => 'OmniExplorer Bot', 'onet\.pl\_sa' => 'Onet.pl_SA', 'onfolio' => 'Onfolio', 'opentaggerbot' => 'OpenTaggerBot', 'openwebspider' => 'OpenWebSpider', 'oracle\sultra\ssearch' => 'Oracle Ultra Search', 'orbiter' => 'Orbiter', 'outfoxbot' => 'OutfoxBot', 'passwordmaker\.org' => 'passwordmaker.org', 'pear\shttp\srequest\sclass' => 'PEAR HTTP Request class', 'peerbot' => 'PEERbot', 'perman' => 'Perman surfer', 'php\_version\_tracker' => 'PHP_version_tracker', 'php\sversion\stracker' => 'PHP version tracker', 'pictureofinternet' => 'PictureOfInternet', 'ping\.blo\.gs' => 'ping.blo.gs', 'plinki' => 'plinki', 'pluckfeedcrawler' => 'PluckFeedCrawler', 'pompos' => 'Pompos', 'popdexter' => 'Popdexter', 'port\shuron\slabs' => 'Port Huron Labs', 'postfavorites' => 'PostFavorites', 'projectwf\-java\-test\-crawler' => 'ProjectWF-java-test-crawler', 'proodlebot' => 'proodleBot', 'pyquery' => 'PyQuery', 'rambler' => 'StackRambler', 'redalert' => 'Red Alert', 'rojo' => 'RoJo aggregator', 'rssimagesbot' => 'rssImagesBot', 'ruffle' => 'ruffle SemanticWeb crawler', 'rufusbot' => 'RufusBot Rufus Web Miner', 'sandcrawler' => 'SandCrawler (Microsoft)', 'sbider' => 'SBIder', 'schizozilla' => 'Schizozilla', 'scumbot' => 'Scumbot', 'searchguild\_dmoz\_experiment' => 'SearchGuild_DMOZ_Experiment', 'seekbot' => 'Seekbot', 'sensis\sweb\scrawler' => 'Sensis Web Crawler', 'seznambot' => 'SeznamBot', 'shim\-crawler' => 'Shim-Crawler', 'shoutcast' => 'Shoutcast Directory Service', 'slysearch' => 'SlySearch', 'snap\.com\sbeta\scrawler' => 'snap.com beta crawler', 'sogou\sspider' => 'sogou spider', 'sogou\stest' => 'sogou test', 'sohu\-search' => 'sohu-search', 'sohu' => 'sohu agent', 'snappy' => 'Snappy', 'sphere\sscout' => 'Sphere Scout', 'sproose\scrawler' => 'sproose crawler', 'steroid\s\sdownload' => 'STEROID Download', 'steeler' => 'Steeler', 'suchfin\-bot' => 'Suchfin-Bot', 'superbot' => 'SuperBot', 'surveybot' => 'SurveyBot', 'susie' => 'Susie', 'syndic8' => 'Syndic8', 'syndicapi' => 'SyndicAPI', 'synoobot' => 'SynooBot', 'tcl\shttp\sclient\spackage' => 'Tcl http client package', 'technoratibot' => 'Technoratibot', 'teragramcrawlersurf' => 'TeragramCrawlerSURF', 'test\scrawler' => 'Test Crawler', 'testbot' => 'TestBot', 't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e' => 'T-H-U-N-D-E-R-S-T-O-N-E', 'topicblogs' => 'topicblogs', 'turnitinbot' => 'Turn It In', 'turtle' => 'Turtle', 'turtlescanner' => 'Turtle', 'tutorgigbot' => 'TutorGigBot', 'ubicrawler' => 'UbiCrawler', 'ultraseek' => 'Ultraseek', 'unchaos\sbot\shybrid\sweb\ssearch\sengine' => 'UnChaos Bot Hybrid Web Search Engine', 'unido\-bot' => 'unido-bot', 'up\.browser' => 'UP.Browser', 'updated' => 'updated', 'ustc\-semantic\-group' => 'USTC-Semantic-Group', 'vagabondo\-wap' => 'Vagabondo-WAP', 'vagabondo' => 'Vagabondo', 'vermut' => 'Vermut', 'versus\scrawler\sfrom\seda\.baykan@epfl\.ch' => 'versus crawler from eda.baykan@epfl.ch', 'vespa\scrawler' => 'Vespa Crawler', 'vortex' => 'VORTEX', 'vse' => 'VSE', 'w3c\-checklink' => 'W3C Link Checker', 'w3c\_css\_validator\_jfouffa' => 'W3C jigsaw CSS Validator', 'w3c_validator' => 'W3C Validator', 'wavefire' => 'Wavefire', 'webclipping\.com' => 'WebClipping.com', 'webcompass' => 'webcompass', 'webcrawl\.net' => 'webcrawl.net', 'web\sdownloader' => 'Web Downloader', 'webdup' => 'Webdup', 'webfilter' => 'WebFilter', 'webindexer' => 'WebIndexer', 'webminer' => 'WebMiner', 'website\_monitoring\_bot' => 'Website_Monitoring_Bot', 'webvulncrawl' => 'WebVulnCrawl', 'wells\ssearch' => 'Wells Search', 'wonderer' => 'Web Wombat Redback Spider', 'wume\scrawler' => 'wume crawler', 'wwweasel' => 'WWWeasel', 'xenu\'s\slink\ssleuth' => 'Xenu Link Sleuth', 'xenu\slink\ssleuth' => 'Xenu Link Sleuth', 'xirq' => 'xirq', 'y!j' => 'Y!J Yahoo Japan', 'yacy' => 'yacy', 'yahoo\-blogs' => 'Yahoo-Blogs', 'yahoo\-verticalcrawler' => 'Yahoo Vertical Crawler', 'yahoofeedseeker' => 'Yahoo Feed Seeker', 'yahooseeker\-testing' => 'YahooSeeker-Testing', 'yahooseeker' => 'YahooSeeker Yahoo! Blog crawler', 'yahoo\-mmcrawler' => 'Yahoo-MMCrawler', 'yahoo!\smindset' => 'Yahoo! Mindset', 'yandex' => 'Yandex bot', 'yooglifetchagent' => 'yoogliFetchAgent', 'z\-add\slink\schecker' => 'Z-Add Link Checker', 'zealbot' => 'ZealBot', 'zspider' => 'zspider', 'zeus' => 'Zeus Webster Pro', 'ng\/1\.' => 'NG 1.x (Exalead)', # put at end to avoid false positive 'ng\/2\.' => 'NG 2.x (Exalead)', # put at end to avoid false positive 'exabot' => 'Exabot', # put at end to avoid false positive 'java' => 'Java (Often spam bot)', # put at end to avoid false positive # Generic root ID 'robot' => 'Unknown robot (identified by \'robot\')', 'crawl' => 'Unknown robot (identified by \'crawl\')', 'spider' => 'Unknown robot (identified by \'spider\')', '\wbot[\/\-]' => 'Unknown robot (identified by \'bot/\' or \'bot-\')', # Unknown robots identified by hit on robots.txt 'unknown' => 'Unknown robot (identified by hit on \'robots.txt\')' ); # RobotsAffiliateLib # This list try to tell by which Search Engine a robot is used #------------------------------------------------------------- $RobotsAffiliateLib = array ( 'fast\-webcrawler'=>'AllTheWeb', 'googlebot'=>'Google', 'msnbot'=>'MSN', 'nutch'=>'Looksmart', 'scooter'=>'AltaVista', 'wisenutbot'=>'Looksmart', 'yahoo\-verticalcrawler'=>'Yahoo', 'zyborg'=>'Looksmart', 'cfetch'=>'Kosmix', '^voyager\/'=>'Kosmix' ); 1;