11#!/usr/bin/env node
22
33var Crawler = require ( "simplecrawler" ) ;
4- var fs = require ( "fs" ) ;
54var program = require ( "commander" ) ;
65var chalk = require ( "chalk" ) ;
7- var request = require ( "request" ) ;
6+ var exec = require ( "child_process" ) . exec ;
7+ var _ = require ( "lodash" ) ;
8+ var Spinner = require ( "cli-spinner" ) . Spinner ;
89var pkg = require ( "./package.json" ) ;
910
1011program . version ( pkg . version )
11- . usage ( "<url>" )
12+ . usage ( "[options] <url>" )
13+ . option ( "-q, --query" , "consider query string" )
1214 . parse ( process . argv ) ;
1315
1416if ( ! program . args [ 0 ] ) {
@@ -19,76 +21,72 @@ var chunk = [];
1921var count = 0 ;
2022var valid = 0 ;
2123var invalid = 0 ;
22- var c = new Crawler ( program . args [ 0 ] ) ;
24+
25+ var url = program . args [ 0 ] . replace ( / ^ ( h t t p : \/ \/ | h t t p s : \/ \/ ) / , "" ) ;
26+ var c = new Crawler ( url ) ;
2327
2428c . initialPath = "/" ;
2529c . initialPort = 80 ;
2630c . initialProtocol = "http" ;
2731c . userAgent = "Node/W3C-Validator" ;
2832
33+ if ( ! program . query ) {
34+ c . stripQuerystring = true ;
35+ }
36+
37+ var exclude = [ "swf" , "pdf" , "ps" , "dwf" , "kml" , "kmz" , "gpx" , "hwp" , "ppt" , "pptx" , "doc" , "docx" , "odp" , "ods" , "odt" , "rtf" , "wri" , "svg" , "tex" , "txt" , "text" , "wml" , "wap" , "xml" , "gif" , "jpg" , "jpeg" , "png" , "ico" , "bmp" , "ogg" , "webp" , "mp4" , "webm" , "mp3" , "ttf" , "woff" , "json" , "rss" , "atom" , "gz" , "zip" , "rar" , "7z" , "css" , "js" , "gzip" , "exe" ] ;
38+
39+ var exts = exclude . join ( "|" ) ;
40+ var regex = new RegExp ( "\.(" + exts + ")" , "i" ) ;
41+
42+ c . addFetchCondition ( function ( parsedURL ) {
43+ return ! parsedURL . path . match ( regex ) ;
44+ } ) ;
45+
46+ var spinner = new Spinner ( "Fetching links... %s" ) ;
47+
2948c . on ( "crawlstart" , function ( ) {
30- console . log ( chalk . white ( "Fetching links..." ) ) ;
49+ spinner . start ( ) ;
3150} ) ;
3251
3352c . on ( "fetchcomplete" , function ( item ) {
3453 chunk . push ( item . url ) ;
3554} ) ;
3655
3756c . on ( "complete" , function ( ) {
57+ spinner . stop ( true ) ;
58+ count = chunk . length ;
3859
39- if ( chunk . length > 0 ) {
40- count = chunk . length ;
41- console . log ( chalk . white ( "Done! Validating..." ) ) ;
60+ if ( ! _ . isEmpty ( chunk ) ) {
61+ console . log ( chalk . white ( "Validating..." ) ) ;
4262 checkURL ( chunk ) ;
4363 } else {
44- console . log ( chalk . white ( "No URLs to validate." ) ) ;
64+ console . error ( chalk . red . bold ( "Error: Site '" + program . args [ 0 ] + "' could not be found." ) ) ;
65+ process . exit ( 1 ) ;
4566 }
4667} ) ;
4768
4869var checkURL = function ( chunk ) {
4970 var url = chunk . pop ( ) ;
5071
51- request . head ( "http://validator.w3.org/check?uri= " + encodeURIComponent ( url ) ) . on ( "response" , function ( response ) {
52- var status = response . caseless . dict [ "x-w3c-validator-status" ] ;
72+ var child = exec ( "java -jar ./vnu/vnu.jar --format json " + url , function ( error , stdout , stderr ) {
73+ var result = JSON . parse ( stderr ) ;
5374
54- if ( status == "Valid" ) {
75+ if ( _ . isEmpty ( result . messages ) ) {
5576 valid ++ ;
5677 console . log ( chalk . bold . green ( "✓" ) , chalk . gray ( url ) ) ;
5778 } else {
5879 invalid ++ ;
59- console . log ( chalk . red ( "×" , url ) ) ;
80+ console . log ( chalk . red . bold ( "×" , url ) ) ;
6081 }
6182
62- if ( chunk . length > 0 ) {
63- return checkURL ( chunk ) ;
83+ if ( ! _ . isEmpty ( chunk ) ) {
84+ checkURL ( chunk ) ;
6485 } else {
65- return console . log ( chalk . white ( "Checked %s sites. %s valid, %s invalid." ) , count , valid , invalid ) ;
86+ console . log ( chalk . white ( "Checked %s sites. %s valid, %s invalid." ) , count , valid , invalid ) ;
87+ process . exit ( ) ;
6688 }
6789 } ) ;
6890} ;
6991
70- var image = c . addFetchCondition ( function ( parsedURL ) {
71- return ! parsedURL . path . match ( / \. ( g i f | j p g | j p e g | p n g | i c o | b m p ) / i) ;
72- } ) ;
73-
74- var media = c . addFetchCondition ( function ( parsedURL ) {
75- return ! parsedURL . path . match ( / \. ( o g g | w e b p | m p 4 | w e b m | m p 3 ) / i) ;
76- } ) ;
77-
78- var font = c . addFetchCondition ( function ( parsedURL ) {
79- return ! parsedURL . path . match ( / \. ( t t f | w o f f ) $ / i) ;
80- } ) ;
81-
82- var data = c . addFetchCondition ( function ( parsedURL ) {
83- return ! parsedURL . path . match ( / \. ( j s o n | r s s | a t o m | g z | z i p | r a r | 7 z | v c f ) / i) ;
84- } ) ;
85-
86- var misc = c . addFetchCondition ( function ( parsedURL ) {
87- return ! parsedURL . path . match ( / \. ( c s s | j s | g z i p | e x e ) / i) ;
88- } ) ;
89-
90- var google = c . addFetchCondition ( function ( parsedURL ) {
91- return ! parsedURL . path . match ( / \. ( s w f | p d f | p s | d w f | k m l | k m z | g p x | h w p | p p t | p p t x | d o c | d o c x | o d p | o d s | o d t | r t f | w r i | s v g | t e x | t x t | t e x t | w m l | w a p | x m l ) / i) ;
92- } ) ;
93-
9492c . start ( ) ;
0 commit comments