Skip to content

Commit c37d689

Browse files
authored
Merge pull request #324 from line-o/better-get
Better get
2 parents 5917639 + 5461f97 commit c37d689

File tree

5 files changed

+101
-29
lines changed

5 files changed

+101
-29
lines changed

commands/get.js

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { statSync, existsSync, mkdirSync } from 'node:fs'
33
import { writeFile } from 'node:fs/promises'
44
import { connect } from '@existdb/node-exist'
55
import Bottleneck from 'bottleneck'
6+
import { getGlobMatcher } from '../utility/glob.js'
67

78
/**
89
* @typedef { import("@existdb/node-exist").NodeExist } NodeExist
@@ -30,15 +31,6 @@ import Bottleneck from 'bottleneck'
3031
* @prop {String[]} exclude filter items
3132
*/
3233

33-
const stringList = {
34-
type: 'string',
35-
array: true,
36-
coerce: (values) =>
37-
values.length === 1 && values[0].trim() === 'false'
38-
? ['**']
39-
: values.reduce((values, value) => values.concat(value.split(',').map((value) => value.trim())), [])
40-
}
41-
4234
const xmlBooleanOptionValue = new Map([
4335
['true', 'yes'],
4436
['yes', 'yes'],
@@ -60,7 +52,11 @@ const xmlBooleanSetting = {
6052
return xmlBooleanOptionValue.get(value)
6153
}
6254
}
63-
const serializationOptionNames = ['insert-final-newline', 'omit-xml-declaration', 'expand-xincludes']
55+
const serializationOptionNames = ['insert-final-newline', 'omit-xml-declaration', 'expand-xincludes', 'method']
56+
57+
const htmlSerializationMethod = {
58+
method: 'html'
59+
}
6460

6561
const serializationDefaults = {
6662
'expand-xincludes': 'yes'
@@ -69,15 +65,26 @@ const serializationDefaults = {
6965
// "output.indent": "no",
7066
// "compression": "yes"
7167
}
68+
function getHtmlSerializationOptions (options) {
69+
const serializationOptions = { ...serializationDefaults }
70+
serializationOptionNames.forEach((o) => {
71+
if (o in options) {
72+
serializationOptions[o] = options[o]
73+
}
74+
})
75+
Object.assign(serializationOptions, htmlSerializationMethod)
76+
// console.log('Serialization options:', serializationOptions)
77+
return serializationOptions
78+
}
7279

7380
function getSerializationOptions (options) {
74-
const serializationOptions = serializationDefaults
81+
const serializationOptions = { ...serializationDefaults }
7582
serializationOptionNames.forEach((o) => {
7683
if (o in options) {
7784
serializationOptions[o] = options[o]
7885
}
7986
})
80-
// console.log(serializationOptions)
87+
// console.log('Serialization options:', serializationOptions)
8188
return serializationOptions
8289
}
8390

@@ -91,12 +98,20 @@ function getSerializationOptions (options) {
9198
*/
9299
async function downloadResource (db, options, resource, directory, collection, rename) {
93100
try {
94-
const { verbose } = options
101+
const { verbose, matchesExcludeGlob, matchesIncludeGlob, matchesHtmlGlob } = options
95102
let fileContents
96103
const path = collection ? posix.join(collection, resource.name) : resource.name
104+
if (matchesExcludeGlob(resource) || !matchesIncludeGlob(resource)) {
105+
if (verbose) {
106+
console.log(`- skipping resource ${path}`)
107+
}
108+
return true
109+
}
97110

98111
if (resource.type === 'BinaryResource') {
99112
fileContents = await db.documents.readBinary(path)
113+
} else if (matchesHtmlGlob(resource)) {
114+
fileContents = await db.documents.read(path, getHtmlSerializationOptions(options))
100115
} else {
101116
fileContents = await db.documents.read(path, getSerializationOptions(options))
102117
}
@@ -202,16 +217,16 @@ async function getPathInfo (db, path) {
202217
*/
203218
async function downloadCollectionOrResource (db, source, target, options) {
204219
// read parameters
205-
// const start = Date.now()
220+
// const start = Date.now()
206221
const root = resolve(target)
207222

208223
if (options.verbose) {
209-
console.error('Downloading:', source, 'to', root)
210-
if (options.include.length > 1 || options.include[0] !== '**') {
211-
console.error('Include:\n', ...options.include, '\n')
224+
console.error('Downloading', source, 'to', root)
225+
if (options.include !== '**') {
226+
console.error('Include', options.include)
212227
}
213-
if (options.exclude.length) {
214-
console.error('Exclude:\n', ...options.exclude, '\n')
228+
if (options.exclude && options.exclude.length) {
229+
console.error('Exclude', options.exclude)
215230
}
216231
console.error(`Downloading up to ${options.threads} resources at a time`)
217232
if (options['expand-xincludes'] === 'false') {
@@ -288,15 +303,15 @@ export function builder (yargs) {
288303
yargs
289304
.option('i', {
290305
alias: 'include',
291-
describe: 'Include only files matching one or more of include patterns (comma separated)',
306+
describe: 'Include only files matching the include globbing pattern',
292307
default: '**',
293-
...stringList
308+
type: 'string'
294309
})
295310
.option('e', {
296311
alias: 'exclude',
297-
describe: 'Exclude any file matching one or more of exclude patterns (comma separated)',
298-
default: [],
299-
...stringList
312+
describe: 'Exclude any file matching the exclude globbing pattern',
313+
default: '',
314+
type: 'string'
300315
})
301316
.option('x', {
302317
group: 'serialization',
@@ -316,6 +331,13 @@ export function builder (yargs) {
316331
describe: 'Force a final newline at the end of an XMLResource (requires eXist >=6.1.0)',
317332
...xmlBooleanSetting
318333
})
334+
.option('H', {
335+
group: 'serialization',
336+
alias: 'serialize-as-html',
337+
describe: 'Serialize resources that match the globbing pattern as HTML',
338+
default: '*.html',
339+
type: 'string'
340+
})
319341
.option('v', {
320342
alias: 'verbose',
321343
describe: 'Log every file and resource that was created',
@@ -342,7 +364,11 @@ export async function handler (argv) {
342364
return 0
343365
}
344366

345-
const { threads, mintime, source } = argv
367+
const { threads, mintime, source, include, exclude, serializeAsHtml } = argv
368+
369+
const matchesIncludeGlob = getGlobMatcher(include)
370+
const matchesExcludeGlob = getGlobMatcher(exclude)
371+
const matchesHtmlGlob = getGlobMatcher(serializeAsHtml)
346372

347373
if (typeof mintime !== 'number' || mintime < 0) {
348374
throw Error('Invalid value for option "mintime"; must be an integer equal or greater than zero.')
@@ -357,5 +383,5 @@ export async function handler (argv) {
357383
const version = await db.server.version()
358384
argv.version = version
359385

360-
return await downloadCollectionOrResource(db, source, target, argv)
386+
return await downloadCollectionOrResource(db, source, target, { ...argv, matchesIncludeGlob, matchesExcludeGlob, matchesHtmlGlob })
361387
}

spec/fixtures/five.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<title>Sample HTML Document</title>
5+
</head>
6+
<body>
7+
<h1>Welcome to My Sample HTML Document</h1>
8+
<p>This is a paragraph of text in the body of the HTML document.</p>
9+
<x-custom-tag></x-custom-tag>
10+
</body>
11+
<script><![CDATA[ console.log(1<2); ]]></script>
12+
</html>

spec/tests/get.js

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ test('with test collection', async (t) => {
173173
st.plan(9)
174174
const verboseLines = stderr.split('\n')
175175
st.equal(verboseLines[0], 'Connecting to https://localhost:8443 as admin', verboseLines[0])
176-
st.ok(verboseLines[1].startsWith('Downloading: /db/get-test to'), verboseLines[1])
176+
st.ok(verboseLines[1].startsWith('Downloading /db/get-test to'), verboseLines[1])
177177
st.equal(verboseLines[2], 'Downloading up to 4 resources at a time', verboseLines[2])
178178
st.equal(verboseLines[3], '', verboseLines[3])
179179
st.equal(verboseLines.length, 4, 'all expected lines in verbose output')
@@ -222,5 +222,29 @@ test('with test collection', async (t) => {
222222
await run('rm', ['-rf', additionalTestDirectory])
223223
})
224224

225+
const globTestDirectory = 'glob-test'
226+
t.test('Downloading only txt files matching glob but exclude a1.txt', async (st) => {
227+
await run('mkdir', [globTestDirectory])
228+
const { stderr, stdout } = await run(
229+
'xst',
230+
['get', testCollection, globTestDirectory, '--include', '*.txt', '--exclude', 'a1.txt'],
231+
asAdmin
232+
)
233+
if (stderr) {
234+
st.fail(stderr)
235+
return st.end()
236+
}
237+
st.plan(2)
238+
239+
st.notOk(stdout, 'no output')
240+
st.deepEqual(
241+
readdirSync(`${globTestDirectory}/${testCollectionName}`),
242+
['a.txt', 'a20.txt', 'empty-subcollection', 'subcollection'],
243+
'only .txt files (except a1.txt) were downloaded'
244+
)
245+
await run('rm', ['-rf', globTestDirectory])
246+
st.end()
247+
})
248+
225249
t.teardown(tearDown)
226250
})

spec/tests/list.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ test('with fixtures uploaded', async (t) => {
257257
/db/list-test/fixtures/binding.json
258258
/db/list-test/fixtures/broken-test-app.xar
259259
/db/list-test/fixtures/connection.xstrc
260+
/db/list-test/fixtures/five.html
260261
/db/list-test/fixtures/test-app.xar
261262
/db/list-test/fixtures/test-lib.xar
262263
/db/list-test/fixtures/test.xml
@@ -293,6 +294,7 @@ test('with fixtures uploaded', async (t) => {
293294
/db/list-test/fixtures/test-app.xar
294295
/db/list-test/fixtures/test-lib.xar
295296
/db/list-test/fixtures/broken-test-app.xar
297+
/db/list-test/fixtures/five.html
296298
/db/list-test/fixtures/test.xml
297299
/db/list-test/fixtures/web-no-rest.xml
298300
/db/list-test/tests

utility/glob.js

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ export function toRegExpPattern (glob) {
88
.replace(/\\/g, '\\\\') // escape backslashes
99
.replace(/\./g, '\\.') // make . literals
1010
.replace(/\?/g, '.') // transform ?
11-
.replace(/\*/g, '.*?') // transform *
11+
.replace(/\*\*/g, '.*?') // transform **
12+
.replace(/\*/g, '[^/]*?') // transform *
1213

1314
return `^${converted}$`
1415
}
@@ -19,6 +20,13 @@ export function toRegExpPattern (glob) {
1920
* @returns {(item:ListResultItem) => Boolean}
2021
*/
2122
export function getGlobMatcher (glob) {
22-
const regex = new RegExp(toRegExpPattern(glob), 'i')
23+
if (glob == null || glob.length === 0 || glob === '') {
24+
return () => false
25+
}
26+
if (glob === '**') {
27+
return () => true
28+
}
29+
const pattern = toRegExpPattern(glob)
30+
const regex = new RegExp(pattern, 'i')
2331
return (item) => regex.test(item.name)
2432
}

0 commit comments

Comments
 (0)