Skip to content

Commit d2834c8

Browse files
authored
✨[RUM-10962][Remote config] support js strategy (#3766)
* ♻️ factorize extractor * ✨add simple JSON path parser * ✨add js strategy support * ⬆️ synchronize remote configuration schema * 👌remove unneeded cast * 👌add extra comments * 👌replace sets by lists * 👌use String.raw to avoid quote juggling * 👌some renamings * 👌remove useless types * 👌rework namings * 🐛fix escaping logic
1 parent 65691c2 commit d2834c8

File tree

7 files changed

+562
-64
lines changed

7 files changed

+562
-64
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { parseJsonPath } from './jsonPathParser'
2+
3+
describe('parseJsonPath', () => {
4+
it('should extract selectors from dot notation', () => {
5+
expect(parseJsonPath('a')).toEqual(['a'])
6+
expect(parseJsonPath('foo.bar')).toEqual(['foo', 'bar'])
7+
expect(parseJsonPath('foo.bar.qux')).toEqual(['foo', 'bar', 'qux'])
8+
})
9+
10+
it('should parse extract selectors from bracket notation', () => {
11+
expect(parseJsonPath(String.raw`['a']`)).toEqual(['a'])
12+
expect(parseJsonPath(String.raw`["a"]`)).toEqual(['a'])
13+
expect(parseJsonPath(String.raw`['foo']["bar"]`)).toEqual(['foo', 'bar'])
14+
expect(parseJsonPath(String.raw`['foo']["bar"]['qux']`)).toEqual(['foo', 'bar', 'qux'])
15+
})
16+
17+
it('should extract selectors from mixed notations', () => {
18+
expect(parseJsonPath(String.raw`['foo'].bar['qux']`)).toEqual(['foo', 'bar', 'qux'])
19+
})
20+
21+
it('should extract name and index selectors', () => {
22+
expect(parseJsonPath('[0]')).toEqual(['0'])
23+
expect(parseJsonPath('foo[12]')).toEqual(['foo', '12'])
24+
expect(parseJsonPath(String.raw`['foo'][12]`)).toEqual(['foo', '12'])
25+
})
26+
27+
it('should extract name selectors replacing escaped sequence by equivalent character', () => {
28+
expect(parseJsonPath(String.raw`['foo\n']`)).toEqual(['foo\n'])
29+
expect(parseJsonPath(String.raw`['foo\b']`)).toEqual(['foo\b'])
30+
expect(parseJsonPath(String.raw`['foo\t']`)).toEqual(['foo\t'])
31+
expect(parseJsonPath(String.raw`['foo\f']`)).toEqual(['foo\f'])
32+
expect(parseJsonPath(String.raw`['foo\r']`)).toEqual(['foo\r'])
33+
expect(parseJsonPath(String.raw`["foo\u03A9"]`)).toEqual(['fooΩ'])
34+
expect(parseJsonPath(String.raw`["\u03A9A"]`)).toEqual(['ΩA'])
35+
expect(parseJsonPath(String.raw`["\t\u03A9\n"]`)).toEqual(['\tΩ\n'])
36+
expect(parseJsonPath(String.raw`['foo\'']`)).toEqual([String.raw`foo'`])
37+
expect(parseJsonPath(String.raw`["foo\""]`)).toEqual([String.raw`foo"`])
38+
expect(parseJsonPath(String.raw`["foo\/"]`)).toEqual([String.raw`foo/`])
39+
})
40+
41+
it('should extract name selectors containing characters not supported in name shorthands', () => {
42+
expect(parseJsonPath(String.raw`['foo[]']`)).toEqual([String.raw`foo[]`])
43+
expect(parseJsonPath(String.raw`['foo.']`)).toEqual([String.raw`foo.`])
44+
})
45+
46+
it('should return an empty array for an invalid path', () => {
47+
expect(parseJsonPath('.foo')).toEqual([])
48+
expect(parseJsonPath('.')).toEqual([])
49+
expect(parseJsonPath('foo.')).toEqual([])
50+
expect(parseJsonPath('foo..bar')).toEqual([])
51+
expect(parseJsonPath('[1')).toEqual([])
52+
expect(parseJsonPath('foo]')).toEqual([])
53+
expect(parseJsonPath(String.raw`[['foo']`)).toEqual([])
54+
expect(parseJsonPath(String.raw`['foo'`)).toEqual([])
55+
expect(parseJsonPath(String.raw`['foo]`)).toEqual([])
56+
expect(parseJsonPath(String.raw`[foo']`)).toEqual([])
57+
expect(parseJsonPath(String.raw`['foo''bar']`)).toEqual([])
58+
expect(parseJsonPath(String.raw`['foo\o']`)).toEqual([])
59+
expect(parseJsonPath(String.raw`["\u03Z9"]`)).toEqual([])
60+
expect(parseJsonPath(String.raw`['foo\u12']`)).toEqual([])
61+
expect(parseJsonPath(String.raw`['foo']a`)).toEqual([])
62+
expect(parseJsonPath(String.raw`["foo']`)).toEqual([])
63+
})
64+
})
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/**
2+
* Terminology inspired from https://www.rfc-editor.org/rfc/rfc9535.html
3+
*
4+
* jsonpath-query = segment*
5+
* segment = .name-shorthand / bracketed-selection
6+
* bracketed-selection = ['name-selector'] / ["name-selector"] / [index-selector]
7+
*
8+
* Useful references:
9+
* - https://goessner.net/articles/JsonPath/
10+
* - https://jsonpath.com/
11+
* - https://github.com/jsonpath-standard
12+
*/
13+
14+
interface ParsingContext {
15+
quote: string | undefined
16+
escapeSequence: string | undefined
17+
}
18+
19+
/**
20+
* Extract selectors from a simple JSON path expression, return [] for an invalid path
21+
*
22+
* Supports:
23+
* - Dot notation: `foo.bar.baz`
24+
* - Bracket notation: `['foo']["bar"]`
25+
* - Array indices: `items[0]`, `data['users'][1]`
26+
*
27+
* Examples:
28+
* parseJsonPath("['foo'].bar[12]")
29+
* => ['foo', 'bar', '12']
30+
*
31+
* parseJsonPath("['foo")
32+
* => []
33+
*/
34+
export function parseJsonPath(path: string): string[] {
35+
const selectors: string[] = []
36+
let previousToken = Token.START
37+
let currentToken: Token | undefined
38+
const parsingContext: ParsingContext = { quote: undefined, escapeSequence: undefined }
39+
let currentSelector = ''
40+
for (const char of path) {
41+
// find which kind of token is this char
42+
currentToken = ALLOWED_NEXT_TOKENS[previousToken].find((token) => TOKEN_PREDICATE[token](char, parsingContext))
43+
if (!currentToken) {
44+
return []
45+
}
46+
if (parsingContext.escapeSequence !== undefined && currentToken !== Token.ESCAPE_SEQUENCE_CHAR) {
47+
if (!isValidEscapeSequence(parsingContext.escapeSequence)) {
48+
return []
49+
}
50+
currentSelector += resolveEscapeSequence(parsingContext.escapeSequence)
51+
parsingContext.escapeSequence = undefined
52+
}
53+
if (ALLOWED_SELECTOR_TOKENS.includes(currentToken)) {
54+
// buffer the char if it belongs to the selector
55+
// ex: foo['bar']
56+
// ^ ^
57+
currentSelector += char
58+
} else if (ALLOWED_SELECTOR_DELIMITER_TOKENS.includes(currentToken) && currentSelector !== '') {
59+
// close the current path part if we have reach a path part delimiter
60+
// ex: foo.bar['qux']
61+
// ^ ^ ^
62+
selectors.push(currentSelector)
63+
currentSelector = ''
64+
} else if (currentToken === Token.ESCAPE_SEQUENCE_CHAR) {
65+
parsingContext.escapeSequence = parsingContext.escapeSequence ? `${parsingContext.escapeSequence}${char}` : char
66+
} else if (currentToken === Token.QUOTE_START) {
67+
parsingContext.quote = char
68+
} else if (currentToken === Token.QUOTE_END) {
69+
parsingContext.quote = undefined
70+
}
71+
previousToken = currentToken
72+
}
73+
if (!ALLOWED_NEXT_TOKENS[previousToken].includes(Token.END)) {
74+
return []
75+
}
76+
if (currentSelector !== '') {
77+
selectors.push(currentSelector)
78+
}
79+
return selectors
80+
}
81+
82+
/**
83+
* List of all tokens in the path
84+
*
85+
* @example foo.bar['qu\'x'][0]
86+
* | | | | |
87+
* Token sequence: | | | | |
88+
* 1. START (before first char) <-+ | | | |
89+
* 2. NAME_SHORTHAND_FIRST_CHAR: f | | | |
90+
* 3. NAME_SHORTHAND_CHAR: oo | | | |
91+
* 4. DOT: . <------------------------+ | | |
92+
* 5. NAME_SHORTHAND_FIRST_CHAR: b | | |
93+
* 6. NAME_SHORTHAND_CHAR: ar | | |
94+
* 7. BRACKET_START: [ <------------------+ | |
95+
* 8. QUOTE_START: ' | |
96+
* 9. NAME_SELECTOR_CHAR: qu | |
97+
* 10. ESCAPE: \ | |
98+
* 11. ESCAPABLE_CHAR: ' | |
99+
* 12. NAME_SELECTOR_CHAR: x | |
100+
* 13. QUOTE_END: ' | |
101+
* 14. BRACKET_END: ] | |
102+
* 15. BRACKET_START: [ <--------------------------+ |
103+
* 16. DIGIT: 0 |
104+
* 17. BRACKET_END: ] |
105+
* 18. END (after last char) <------------------------+
106+
*/
107+
const enum Token {
108+
START,
109+
END,
110+
111+
NAME_SHORTHAND_FIRST_CHAR,
112+
NAME_SHORTHAND_CHAR,
113+
DOT,
114+
115+
BRACKET_START,
116+
BRACKET_END,
117+
DIGIT,
118+
119+
QUOTE_START,
120+
QUOTE_END,
121+
NAME_SELECTOR_CHAR,
122+
ESCAPE,
123+
ESCAPE_SEQUENCE_CHAR,
124+
}
125+
126+
const NAME_SHORTHAND_FIRST_CHAR_REGEX = /[a-zA-Z_$]/
127+
const NAME_SHORTHAND_CHAR_REGEX = /[a-zA-Z0-9_$]/
128+
const DIGIT_REGEX = /[0-9]/
129+
const UNICODE_CHAR_REGEX = /[a-fA-F0-9]/
130+
const QUOTE_CHARS = '\'"'
131+
132+
const TOKEN_PREDICATE: { [token in Token]: (char: string, parsingContext: ParsingContext) => boolean } = {
133+
// no char should match to START or END
134+
[Token.START]: () => false,
135+
[Token.END]: () => false,
136+
137+
[Token.NAME_SHORTHAND_FIRST_CHAR]: (char) => NAME_SHORTHAND_FIRST_CHAR_REGEX.test(char),
138+
[Token.NAME_SHORTHAND_CHAR]: (char) => NAME_SHORTHAND_CHAR_REGEX.test(char),
139+
[Token.DOT]: (char) => char === '.',
140+
141+
[Token.BRACKET_START]: (char) => char === '[',
142+
[Token.BRACKET_END]: (char) => char === ']',
143+
[Token.DIGIT]: (char) => DIGIT_REGEX.test(char),
144+
145+
[Token.QUOTE_START]: (char) => QUOTE_CHARS.includes(char),
146+
[Token.QUOTE_END]: (char, parsingContext) => char === parsingContext.quote,
147+
[Token.NAME_SELECTOR_CHAR]: () => true, // any char can be used in name selector
148+
[Token.ESCAPE]: (char) => char === '\\',
149+
[Token.ESCAPE_SEQUENCE_CHAR]: (char, parsingContext) => {
150+
if (parsingContext.escapeSequence === undefined) {
151+
// see https://www.rfc-editor.org/rfc/rfc9535.html#name-semantics-3
152+
return `${parsingContext.quote}/\\bfnrtu`.includes(char)
153+
} else if (parsingContext.escapeSequence.startsWith('u') && parsingContext.escapeSequence.length < 5) {
154+
return UNICODE_CHAR_REGEX.test(char)
155+
}
156+
return false
157+
},
158+
}
159+
160+
const ALLOWED_NEXT_TOKENS: { [token in Token]: Token[] } = {
161+
[Token.START]: [Token.NAME_SHORTHAND_FIRST_CHAR, Token.BRACKET_START],
162+
[Token.END]: [],
163+
164+
[Token.NAME_SHORTHAND_FIRST_CHAR]: [Token.NAME_SHORTHAND_CHAR, Token.DOT, Token.BRACKET_START, Token.END],
165+
[Token.NAME_SHORTHAND_CHAR]: [Token.NAME_SHORTHAND_CHAR, Token.DOT, Token.BRACKET_START, Token.END],
166+
[Token.DOT]: [Token.NAME_SHORTHAND_FIRST_CHAR],
167+
168+
[Token.BRACKET_START]: [Token.QUOTE_START, Token.DIGIT],
169+
[Token.BRACKET_END]: [Token.DOT, Token.BRACKET_START, Token.END],
170+
[Token.DIGIT]: [Token.DIGIT, Token.BRACKET_END],
171+
172+
[Token.QUOTE_START]: [Token.ESCAPE, Token.QUOTE_END, Token.NAME_SELECTOR_CHAR],
173+
[Token.QUOTE_END]: [Token.BRACKET_END],
174+
[Token.NAME_SELECTOR_CHAR]: [Token.ESCAPE, Token.QUOTE_END, Token.NAME_SELECTOR_CHAR],
175+
[Token.ESCAPE]: [Token.ESCAPE_SEQUENCE_CHAR],
176+
[Token.ESCAPE_SEQUENCE_CHAR]: [Token.ESCAPE_SEQUENCE_CHAR, Token.ESCAPE, Token.QUOTE_END, Token.NAME_SELECTOR_CHAR],
177+
}
178+
179+
// foo['bar\n'][12]
180+
// ^^ ^ ^^ ^
181+
const ALLOWED_SELECTOR_TOKENS = [
182+
Token.NAME_SHORTHAND_FIRST_CHAR,
183+
Token.NAME_SHORTHAND_CHAR,
184+
Token.DIGIT,
185+
Token.NAME_SELECTOR_CHAR,
186+
]
187+
188+
// foo.bar['qux']
189+
// ^ ^ ^
190+
const ALLOWED_SELECTOR_DELIMITER_TOKENS = [Token.DOT, Token.BRACKET_START, Token.BRACKET_END]
191+
192+
function isValidEscapeSequence(escapeSequence: string): boolean {
193+
return '"\'/\\bfnrt'.includes(escapeSequence) || (escapeSequence.startsWith('u') && escapeSequence.length === 5)
194+
}
195+
196+
const ESCAPED_CHARS: { [key: string]: string } = {
197+
'"': '"',
198+
"'": "'",
199+
'/': '/',
200+
'\\': '\\',
201+
b: '\b',
202+
f: '\f',
203+
n: '\n',
204+
r: '\r',
205+
t: '\t',
206+
}
207+
208+
function resolveEscapeSequence(escapeSequence: string): string {
209+
if (escapeSequence.startsWith('u')) {
210+
// build Unicode char from code
211+
return String.fromCharCode(parseInt(escapeSequence.slice(1), 16))
212+
}
213+
return ESCAPED_CHARS[escapeSequence]
214+
}

0 commit comments

Comments
 (0)