Skip to content

Commit 34a8b29

Browse files
committed
Use hand-coded lexer requiring SPDX list packages
Prior to this, `npm prepublish` generated both lexer and parser using Jison, hard-coding the versions of `spdx-license-ids` and `spdx-exceptions` installed as `devDependencies` on the publisher's machine into the package. There were no `dependencies`. This commit adds a hand-coded lexer, and uses Jison to generate only the parser. As a result, the SPDX list-data packages become regular `dependencies`, `npm install` can install the latest versions, and the lexer can `require()` them. This will help ensure that those relying on `spdx-expression-parse` get the latest SPDX lists, without waiting for `spdx-expression-parse` to republish.
1 parent 9dc6d08 commit 34a8b29

File tree

5 files changed

+124
-51
lines changed

5 files changed

+124
-51
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ The syntax comes from the [Software Package Data eXchange (SPDX)](https://spdx.o
3838

3939
The bulk of the SPDX standard describes syntax and semantics of XML metadata files. This package implements two lightweight, plain-text components of that larger standard:
4040

41-
1. The [license list](https://spdx.org/licenses), a mapping from specific string identifiers, like `Apache-2.0`, to standard form license texts and bolt-on license exceptions. The [spdx-license-ids](https://www.npmjs.com/package/spdx-exceptions) and [spdx-exceptions](https://www.npmjs.com/package/spdx-license-ids) packages implement the license list. They are development dependencies of this package.
41+
1. The [license list](https://spdx.org/licenses), a mapping from specific string identifiers, like `Apache-2.0`, to standard form license texts and bolt-on license exceptions. The [spdx-license-ids](https://www.npmjs.com/package/spdx-exceptions) and [spdx-exceptions](https://www.npmjs.com/package/spdx-license-ids) packages implement the license list. They are dependencies of this package.
4242

4343
Any license identifier from the license list is a valid license expression:
4444

generate-parser.js

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,51 +7,7 @@ var options = {
77

88
var words = ['AND', 'OR', 'WITH']
99

10-
var quote = function (argument) {
11-
return '\'' + argument + '\''
12-
}
13-
14-
var regexEscape = function (s) {
15-
return s.replace(/[\^\\$*+?.()|{}[]\/]/g, '\\$&')
16-
}
17-
18-
var handleLicensesAndExceptions = function () {
19-
var ids = require('spdx-license-ids')
20-
var exceptions = require('spdx-exceptions')
21-
22-
// Sort tokens longest-first (both license ids and exception strings)
23-
var tokens = ids.concat(exceptions)
24-
tokens.sort(function (a, b) { return b.length - a.length })
25-
return tokens.map(function (t) {
26-
var type = (ids.indexOf(t) >= 0) ? 'LICENSE' : 'EXCEPTION'
27-
return [regexEscape(t), 'return ' + quote(type)]
28-
})
29-
}
30-
3110
var grammar = {
32-
lex: {
33-
macros: {},
34-
rules: [
35-
['$', 'return ' + quote('EOS')],
36-
['\\s+', '/* skip whitespace */'],
37-
['\\+', 'return ' + quote('PLUS')],
38-
['\\(', 'return ' + quote('OPEN')],
39-
['\\)', 'return ' + quote('CLOSE')],
40-
[':', 'return ' + quote('COLON')],
41-
[
42-
'DocumentRef-([0-9A-Za-z-+.]+)',
43-
'return ' + quote('DOCUMENTREF')
44-
],
45-
[
46-
'LicenseRef-([0-9A-Za-z-+.]+)',
47-
'return ' + quote('LICENSEREF')
48-
]
49-
]
50-
.concat(words.map(function (word) {
51-
return [word, 'return ' + quote(word)]
52-
}))
53-
.concat(handleLicensesAndExceptions())
54-
},
5511
operators: [
5612
['left', 'OR'],
5713
['left', 'AND'],

index.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
var parser = require('./parser').parser
1+
var Scanner = require('./scanner')
2+
var Parser = require('./parser').Parser
23

3-
module.exports = function (argument) {
4-
return parser.parse(argument)
4+
module.exports = function (string) {
5+
var parser = new Parser()
6+
parser.lexer = new Scanner()
7+
return parser.parse(string)
58
}

package.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@
66
"files": [
77
"AUTHORS",
88
"index.js",
9-
"parser.js"
9+
"parser.js",
10+
"scanner.js"
1011
],
12+
"dependencies": {
13+
"spdx-exceptions": "^2.0.0",
14+
"spdx-license-ids": "^2.0.1"
15+
},
1116
"devDependencies": {
1217
"defence-cli": "^2.0.1",
1318
"jison": "^0.4.15",
1419
"replace-require-self": "^1.0.0",
15-
"spdx-exceptions": "^2.0.0",
16-
"spdx-license-ids": "^2.0.1",
1720
"standard": "^10.0.2"
1821
},
1922
"keywords": [

scanner.js

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
var ids = require('spdx-license-ids')
2+
var exceptions = require('spdx-exceptions')
3+
4+
module.exports = function () {
5+
this.setInput = function (string) {
6+
this.tokens = lex(string)
7+
}
8+
this.lex = function () {
9+
var token = this.tokens.shift()
10+
this.yylineno = 1
11+
this.yytext = token.string
12+
this.yyloc = {
13+
first_line: 1,
14+
last_line: 1,
15+
first_column: token.start,
16+
last_column: token.end
17+
}
18+
return token.type
19+
}
20+
}
21+
22+
var DOCUMENTREF = /^DocumentRef-([0-9A-Za-z-+.]+)$/
23+
var LICENSEREF = /^LicenseRef-([0-9A-Za-z-+.]+)$/
24+
var INVALID_CHARACTER = /[^ 0-9A-Za-z.+\-()]/
25+
var SINGLE_CHARACTER_TOKENS = ['(', ')', ':', '+']
26+
27+
var includes = Array.prototype.includes
28+
? function (array, element) {
29+
return array.includes(element)
30+
}
31+
: function (array, element) {
32+
return array.indexOf(element) !== -1
33+
}
34+
35+
function lex (argument) {
36+
if (INVALID_CHARACTER.test(argument)) {
37+
throw new Error('Invalid character')
38+
}
39+
var tokens = []
40+
var characterBuffer = ''
41+
var startedBuffering = null
42+
var length = argument.length
43+
for (var offset = 0; offset < length; offset++) {
44+
var character = argument[offset]
45+
if (character === ' ') {
46+
pushBuffered()
47+
} else if (includes(SINGLE_CHARACTER_TOKENS, character)) {
48+
pushBuffered()
49+
tokens.push({
50+
type: tokenTypeForString(character, offset),
51+
string: character,
52+
start: offset,
53+
end: offset + 1
54+
})
55+
} else {
56+
if (startedBuffering === null) {
57+
startedBuffering = offset
58+
}
59+
characterBuffer += character
60+
}
61+
}
62+
pushBuffered()
63+
tokens.push({
64+
type: 'EOS',
65+
string: '',
66+
start: argument.length,
67+
end: argument.length
68+
})
69+
return tokens
70+
71+
function pushBuffered () {
72+
if (characterBuffer) {
73+
tokens.push({
74+
type: tokenTypeForString(characterBuffer, startedBuffering),
75+
string: characterBuffer,
76+
start: startedBuffering,
77+
end: startedBuffering + characterBuffer.length
78+
})
79+
characterBuffer = ''
80+
startedBuffering = null
81+
}
82+
}
83+
}
84+
85+
function tokenTypeForString (string, start) {
86+
if (ids.indexOf(string) !== -1) {
87+
return 'LICENSE'
88+
} else if (string === 'AND') {
89+
return string
90+
} else if (string === 'OR') {
91+
return string
92+
} else if (string === 'WITH') {
93+
return string
94+
} else if (exceptions.indexOf(string) !== -1) {
95+
return 'EXCEPTION'
96+
} else if (LICENSEREF.test(string)) {
97+
return 'LICENSEREF'
98+
} else if (DOCUMENTREF.test(string)) {
99+
return 'DOCUMENTREF'
100+
} else if (string === '(') {
101+
return 'OPEN'
102+
} else if (string === ')') {
103+
return 'CLOSE'
104+
} else if (string === ':') {
105+
return 'COLON'
106+
} else if (string === '+') {
107+
return 'PLUS'
108+
} else {
109+
throw new Error('Invalid input at offset ' + start)
110+
}
111+
}

0 commit comments

Comments
 (0)