Skip to content

Commit 5b28033

Browse files
committed
It's always after you publish, isn't it? Also bumping the node engine version in package.json to avoid warnings about node 18's new fetch API.
1 parent 6c6a3e5 commit 5b28033

File tree

2 files changed

+45
-21
lines changed

2 files changed

+45
-21
lines changed

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"name": "spidergram",
3-
"version": "0.8.2",
3+
"version": "0.8.3",
44
"description": "Structural analysis tools for complex web sites",
55
"main": "./dist/index.js",
66
"exports": "./dist/index.js",
77
"types": "./dist/index.d.ts",
88
"type": "module",
99
"engines": {
10-
"node": "^18.0.0"
10+
"node": "^18.1.0"
1111
},
1212
"bin": {
1313
"spidergram": "./bin/run.js"

src/cli/commands/urls.ts

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
import { Flags } from '@oclif/core';
22
import { NormalizedUrlSet } from '@autogram/url-tools';
3-
import { CLI, Query, SgCommand, aql, HierarchyTools, TextTools } from '../../index.js';
3+
import {
4+
CLI,
5+
Query,
6+
SgCommand,
7+
aql,
8+
HierarchyTools,
9+
TextTools,
10+
} from '../../index.js';
411
import { URL_WITH_COMMAS_REGEX } from 'crawlee';
512
import { readFile } from 'fs/promises';
613
import minimatch from 'minimatch';
@@ -61,7 +68,8 @@ export default class Urls extends SgCommand {
6168
}),
6269
hide: Flags.string({
6370
summary: 'URLs matching this string will be hidden from view',
64-
description: "Both --hide and --highlight use glob-style wildcards; '**/*cnn.com*' will match content on CNN or one of its domains; '**/news*' would only display the news directory and its descendents, and so on.",
71+
description:
72+
"Both --hide and --highlight use glob-style wildcards; '**/*cnn.com*' will match content on CNN or one of its domains; '**/news*' would only display the news directory and its descendents, and so on.",
6573
dependsOn: ['tree'],
6674
required: false,
6775
helpGroup: 'FORMAT',
@@ -109,11 +117,13 @@ export default class Urls extends SgCommand {
109117
}),
110118
};
111119

112-
static args = [{
113-
name: 'input',
114-
description: 'A database collection, local filename, or remote URL',
115-
default: 'resources'
116-
}]
120+
static args = [
121+
{
122+
name: 'input',
123+
description: 'A database collection, local filename, or remote URL',
124+
default: 'resources',
125+
},
126+
];
117127

118128
async run() {
119129
const { args, flags } = await this.parse(Urls);
@@ -126,13 +136,13 @@ export default class Urls extends SgCommand {
126136

127137
if (isParsableUrl(args.input)) {
128138
const responseData = await fetch(new URL(args.input))
129-
.then(response => response.text() )
139+
.then(response => response.text())
130140
.catch(reason => {
131141
if (reason instanceof Error) this.error(reason.message);
132-
else this.error("An error occurred loading the URL.");
142+
else this.error('An error occurred loading the URL.');
133143
});
134-
rawUrls = responseData.match(URL_WITH_COMMAS_REGEX) || [];
135-
} else if (args.input.indexOf('.') !== -1) {
144+
rawUrls = responseData.match(URL_WITH_COMMAS_REGEX) || [];
145+
} else if (args.input.indexOf('.') !== -1) {
136146
const urlFile = await readFile(args.input)
137147
.then(buffer => buffer.toString())
138148
.catch(() => this.error(`File ${args.input} couldn't be opened`));
@@ -177,10 +187,16 @@ export default class Urls extends SgCommand {
177187
summary['Hidden URLs'] = rawUrls.length - filteredUrls.length;
178188
}
179189
if (urls.unparsable.size) {
180-
summary['Unparsable Urls'] = flags.unparsable ? [...urls.unparsable] : urls.unparsable.size;
190+
summary['Unparsable Urls'] = flags.unparsable
191+
? [...urls.unparsable]
192+
: urls.unparsable.size;
181193
}
182-
if ((urls.size - webUrls.length) > 0) {
183-
summary['Non-Web URLs'] = flags.nonweb ? [...urls].filter(url => !['https:', 'http:'].includes(url.protocol)).map(url => url.href) : urls.size - webUrls.length;
194+
if (urls.size - webUrls.length > 0) {
195+
summary['Non-Web URLs'] = flags.nonweb
196+
? [...urls]
197+
.filter(url => !['https:', 'http:'].includes(url.protocol))
198+
.map(url => url.href)
199+
: urls.size - webUrls.length;
184200
}
185201

186202
const output: string[] = [];
@@ -222,11 +238,15 @@ export default class Urls extends SgCommand {
222238
};
223239
}
224240

225-
const hierarchy = new HierarchyTools.UrlHierarchyBuilder(treeOptions).add(webUrls);
241+
const hierarchy = new HierarchyTools.UrlHierarchyBuilder(treeOptions).add(
242+
webUrls,
243+
);
226244
const orphans = hierarchy.items.filter(item => item.isOrphan).length;
227245
if (orphans > 0) {
228246
if (flags.orphans) {
229-
summary['Orphaned URLs'] = hierarchy.items.filter(item => item.isOrphan).map(orphan => orphan.data.url.toString());
247+
summary['Orphaned URLs'] = hierarchy.items
248+
.filter(item => item.isOrphan)
249+
.map(orphan => orphan.data.url.toString());
230250
} else {
231251
summary['Orphaned URLs'] = orphans;
232252
}
@@ -238,9 +258,13 @@ export default class Urls extends SgCommand {
238258
summaryLines.push('# URL Summary');
239259
for (const [bullet, content] of Object.entries(summary)) {
240260
if (typeof content === 'number') {
241-
summaryLines.push(`- **${bullet}**: ${content.toLocaleString().trim()}`);
261+
summaryLines.push(
262+
`- **${bullet}**: ${content.toLocaleString().trim()}`,
263+
);
242264
} else {
243-
summaryLines.push(`- **${bullet}**: ${TextTools.joinOxford(content).trim()}`);
265+
summaryLines.push(
266+
`- **${bullet}**: ${TextTools.joinOxford(content).trim()}`,
267+
);
244268
}
245269
}
246270
output.push(summaryLines.join('\n'));
@@ -268,4 +292,4 @@ function isParsableUrl(input: string) {
268292
} catch {
269293
return false;
270294
}
271-
}
295+
}

0 commit comments

Comments
 (0)