11import { Flags } from '@oclif/core' ;
22import { NormalizedUrlSet } from '@autogram/url-tools' ;
3- import { CLI , Query , SgCommand , aql , HierarchyTools , TextTools } from '../../index.js' ;
3+ import {
4+ CLI ,
5+ Query ,
6+ SgCommand ,
7+ aql ,
8+ HierarchyTools ,
9+ TextTools ,
10+ } from '../../index.js' ;
411import { URL_WITH_COMMAS_REGEX } from 'crawlee' ;
512import { readFile } from 'fs/promises' ;
613import minimatch from 'minimatch' ;
@@ -61,7 +68,8 @@ export default class Urls extends SgCommand {
6168 } ) ,
6269 hide : Flags . string ( {
6370 summary : 'URLs matching this string will be hidden from view' ,
64- description : "Both --hide and --highlight use glob-style wildcards; '**/*cnn.com*' will match content on CNN or one of its domains; '**/news*' would only display the news directory and its descendents, and so on." ,
71+ description :
72+ "Both --hide and --highlight use glob-style wildcards; '**/*cnn.com*' will match content on CNN or one of its domains; '**/news*' would only display the news directory and its descendents, and so on." ,
6573 dependsOn : [ 'tree' ] ,
6674 required : false ,
6775 helpGroup : 'FORMAT' ,
@@ -109,11 +117,13 @@ export default class Urls extends SgCommand {
109117 } ) ,
110118 } ;
111119
112- static args = [ {
113- name : 'input' ,
114- description : 'A database collection, local filename, or remote URL' ,
115- default : 'resources'
116- } ]
120+ static args = [
121+ {
122+ name : 'input' ,
123+ description : 'A database collection, local filename, or remote URL' ,
124+ default : 'resources' ,
125+ } ,
126+ ] ;
117127
118128 async run ( ) {
119129 const { args, flags } = await this . parse ( Urls ) ;
@@ -126,13 +136,13 @@ export default class Urls extends SgCommand {
126136
127137 if ( isParsableUrl ( args . input ) ) {
128138 const responseData = await fetch ( new URL ( args . input ) )
129- . then ( response => response . text ( ) )
139+ . then ( response => response . text ( ) )
130140 . catch ( reason => {
131141 if ( reason instanceof Error ) this . error ( reason . message ) ;
132- else this . error ( " An error occurred loading the URL." ) ;
142+ else this . error ( ' An error occurred loading the URL.' ) ;
133143 } ) ;
134- rawUrls = responseData . match ( URL_WITH_COMMAS_REGEX ) || [ ] ;
135- } else if ( args . input . indexOf ( '.' ) !== - 1 ) {
144+ rawUrls = responseData . match ( URL_WITH_COMMAS_REGEX ) || [ ] ;
145+ } else if ( args . input . indexOf ( '.' ) !== - 1 ) {
136146 const urlFile = await readFile ( args . input )
137147 . then ( buffer => buffer . toString ( ) )
138148 . catch ( ( ) => this . error ( `File ${ args . input } couldn't be opened` ) ) ;
@@ -177,10 +187,16 @@ export default class Urls extends SgCommand {
177187 summary [ 'Hidden URLs' ] = rawUrls . length - filteredUrls . length ;
178188 }
179189 if ( urls . unparsable . size ) {
180- summary [ 'Unparsable Urls' ] = flags . unparsable ? [ ...urls . unparsable ] : urls . unparsable . size ;
190+ summary [ 'Unparsable Urls' ] = flags . unparsable
191+ ? [ ...urls . unparsable ]
192+ : urls . unparsable . size ;
181193 }
182- if ( ( urls . size - webUrls . length ) > 0 ) {
183- summary [ 'Non-Web URLs' ] = flags . nonweb ? [ ...urls ] . filter ( url => ! [ 'https:' , 'http:' ] . includes ( url . protocol ) ) . map ( url => url . href ) : urls . size - webUrls . length ;
194+ if ( urls . size - webUrls . length > 0 ) {
195+ summary [ 'Non-Web URLs' ] = flags . nonweb
196+ ? [ ...urls ]
197+ . filter ( url => ! [ 'https:' , 'http:' ] . includes ( url . protocol ) )
198+ . map ( url => url . href )
199+ : urls . size - webUrls . length ;
184200 }
185201
186202 const output : string [ ] = [ ] ;
@@ -222,11 +238,15 @@ export default class Urls extends SgCommand {
222238 } ;
223239 }
224240
225- const hierarchy = new HierarchyTools . UrlHierarchyBuilder ( treeOptions ) . add ( webUrls ) ;
241+ const hierarchy = new HierarchyTools . UrlHierarchyBuilder ( treeOptions ) . add (
242+ webUrls ,
243+ ) ;
226244 const orphans = hierarchy . items . filter ( item => item . isOrphan ) . length ;
227245 if ( orphans > 0 ) {
228246 if ( flags . orphans ) {
229- summary [ 'Orphaned URLs' ] = hierarchy . items . filter ( item => item . isOrphan ) . map ( orphan => orphan . data . url . toString ( ) ) ;
247+ summary [ 'Orphaned URLs' ] = hierarchy . items
248+ . filter ( item => item . isOrphan )
249+ . map ( orphan => orphan . data . url . toString ( ) ) ;
230250 } else {
231251 summary [ 'Orphaned URLs' ] = orphans ;
232252 }
@@ -238,9 +258,13 @@ export default class Urls extends SgCommand {
238258 summaryLines . push ( '# URL Summary' ) ;
239259 for ( const [ bullet , content ] of Object . entries ( summary ) ) {
240260 if ( typeof content === 'number' ) {
241- summaryLines . push ( `- **${ bullet } **: ${ content . toLocaleString ( ) . trim ( ) } ` ) ;
261+ summaryLines . push (
262+ `- **${ bullet } **: ${ content . toLocaleString ( ) . trim ( ) } ` ,
263+ ) ;
242264 } else {
243- summaryLines . push ( `- **${ bullet } **: ${ TextTools . joinOxford ( content ) . trim ( ) } ` ) ;
265+ summaryLines . push (
266+ `- **${ bullet } **: ${ TextTools . joinOxford ( content ) . trim ( ) } ` ,
267+ ) ;
244268 }
245269 }
246270 output . push ( summaryLines . join ( '\n' ) ) ;
@@ -268,4 +292,4 @@ function isParsableUrl(input: string) {
268292 } catch {
269293 return false ;
270294 }
271- }
295+ }
0 commit comments