Skip to content

Commit 57612da

Browse files
authored
Merge pull request #560 from krassowski/fix-559-regexp-extractors-offset
Fix regular-expression based extractors offset
2 parents 5c82e0c + f379754 commit 57612da

File tree

12 files changed

+296
-47
lines changed

12 files changed

+296
-47
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010

1111
- prevents throwing a highlights error when adding new cell with <kbd>Shift</kbd> + <kbd>Enter</kbd> ([#544])
1212
- fixes IPython `pinfo` and `pinfo2` (`?` and `??`) for identifiers containing `s` ([#547])
13+
- fixes incorrect behaviour of LSP features in some IPython magics with single line of content ([#560])
1314

1415
[#544]: https://github.com/krassowski/jupyterlab-lsp/pull/544
1516
[#547]: https://github.com/krassowski/jupyterlab-lsp/pull/547
1617
[#553]: https://github.com/krassowski/jupyterlab-lsp/pull/553
18+
[#560]: https://github.com/krassowski/jupyterlab-lsp/pull/560
1719

1820
### `jupyter-lsp 1.1.4` (2020-02-21)
1921

atest/05_Features/Completion.robot

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,28 @@ Shows Documentation With CompletionItem Resolve
296296
Completer Should Include Documentation the default method of the
297297
[Teardown] Clean Up After Working With File completion.R
298298

299+
Shows Only Relevant Suggestions In Known Magics
300+
# https://github.com/krassowski/jupyterlab-lsp/issues/559
301+
# h<tab>
302+
Enter Cell Editor 20 line=2
303+
Trigger Completer
304+
Completer Should Suggest help
305+
Completer Should Not Suggest from
306+
Completer Should Suggest hash
307+
308+
Completes In R Magics
309+
# Proper completion in R magics needs to be tested as:
310+
# - R magic extractor uses a tailor-made replacer function, not tested elsewhere
311+
# - R lanugage server is very sensitive to off-by-one errors (see https://github.com/REditorSupport/languageserver/issues/395)
312+
# '%%R\n librar<tab>'
313+
Enter Cell Editor 22 line=2
314+
Trigger Completer
315+
Completer Should Suggest library
316+
# '%R lib<tab>'
317+
Enter Cell Editor 24 line=1
318+
Trigger Completer
319+
Completer Should Suggest library
320+
299321
*** Keywords ***
300322
Setup Completion Test
301323
Setup Notebook Python Completion.ipynb

atest/examples/Completion.ipynb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,56 @@
150150
"source": [
151151
"t"
152152
]
153+
},
154+
{
155+
"cell_type": "markdown",
156+
"metadata": {},
157+
"source": [
158+
"Cell magics show only relevant suggestions (triggering after `h` should return `hash`, `help`, etc, but not `from`):"
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": null,
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"%%python\n",
168+
"h"
169+
]
170+
},
171+
{
172+
"cell_type": "markdown",
173+
"metadata": {},
174+
"source": [
175+
"Test that the leading space does not cause issues in R cell magic:"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"metadata": {},
182+
"outputs": [],
183+
"source": [
184+
"%%R\n",
185+
" librar"
186+
]
187+
},
188+
{
189+
"cell_type": "markdown",
190+
"metadata": {},
191+
"source": [
192+
"And that R line magic works too:"
193+
]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"metadata": {},
199+
"outputs": [],
200+
"source": [
201+
"%R li"
202+
]
153203
}
154204
],
155205
"metadata": {

packages/jupyterlab-lsp/src/extractors/regexp.spec.ts

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import { expect } from 'chai';
2-
import { RegExpForeignCodeExtractor } from './regexp';
2+
import { getIndexOfCaptureGroup, RegExpForeignCodeExtractor } from './regexp';
33

44
let R_CELL_MAGIC_EXISTS = `%%R
55
some text
66
`;
77

8+
let PYTHON_CELL_MAGIC_WITH_H = `%%python
9+
h`;
10+
811
let NO_CELL_MAGIC = `%R
912
some text
1013
%%R
@@ -23,11 +26,23 @@ x = """<a href="#">
2326
</a>""";
2427
print(x)`;
2528

29+
describe('getIndexOfCaptureGroup', () => {
30+
it('extracts index of a captured group', () => {
31+
// tests for https://github.com/krassowski/jupyterlab-lsp/issues/559
32+
let result = getIndexOfCaptureGroup(
33+
new RegExp('^%%(python|python2|python3|pypy)( .*?)?\n([^]*)'),
34+
'%%python\nh',
35+
'h'
36+
);
37+
expect(result).to.be.equal(9);
38+
});
39+
});
40+
2641
describe('RegExpForeignCodeExtractor', () => {
2742
let r_cell_extractor = new RegExpForeignCodeExtractor({
2843
language: 'R',
2944
pattern: '^%%R( .*?)?\n([^]*)',
30-
extract_to_foreign: '$2',
45+
foreign_capture_groups: [2],
3146
keep_in_host: true,
3247
is_standalone: false,
3348
file_extension: 'R'
@@ -36,12 +51,21 @@ describe('RegExpForeignCodeExtractor', () => {
3651
let r_line_extractor = new RegExpForeignCodeExtractor({
3752
language: 'R',
3853
pattern: '(^|\n)%R (.*)\n?',
39-
extract_to_foreign: '$2',
54+
foreign_capture_groups: [2],
4055
keep_in_host: true,
4156
is_standalone: false,
4257
file_extension: 'R'
4358
});
4459

60+
let python_cell_extractor = new RegExpForeignCodeExtractor({
61+
language: 'python',
62+
pattern: '^%%(python|python2|python3|pypy)( .*?)?\n([^]*)',
63+
foreign_capture_groups: [3],
64+
keep_in_host: true,
65+
is_standalone: true,
66+
file_extension: 'py'
67+
});
68+
4569
describe('#has_foreign_code()', () => {
4670
it('detects cell magics', () => {
4771
let result = r_cell_extractor.has_foreign_code(R_CELL_MAGIC_EXISTS);
@@ -75,16 +99,34 @@ describe('RegExpForeignCodeExtractor', () => {
7599
});
76100

77101
describe('#extract_foreign_code()', () => {
102+
it('should correctly return the range', () => {
103+
let results = python_cell_extractor.extract_foreign_code(
104+
PYTHON_CELL_MAGIC_WITH_H
105+
);
106+
expect(results.length).to.equal(1);
107+
108+
let result = results[0];
109+
110+
// test against https://github.com/krassowski/jupyterlab-lsp/issues/559
111+
expect(result.host_code).to.equal(PYTHON_CELL_MAGIC_WITH_H);
112+
expect(result.foreign_code).to.equal('h');
113+
114+
expect(result.range.start.line).to.equal(1);
115+
expect(result.range.start.column).to.equal(0);
116+
expect(result.range.end.line).to.equal(1);
117+
expect(result.range.end.column).to.equal(1);
118+
});
119+
78120
it('should work with non-line magic and non-cell magic code snippets as well', () => {
79121
// Note: in the real application, one should NOT use regular expressions for HTML extraction
80122

81123
let html_extractor = new RegExpForeignCodeExtractor({
82124
language: 'HTML',
83-
pattern: '<(.*?)( .*?)?>([^]*?)</\\1>',
84-
extract_to_foreign: '<$1$2>$3</$1>',
125+
pattern: '(<(.*?)( .*?)?>([^]*?)</\\2>)',
126+
foreign_capture_groups: [1],
85127
keep_in_host: false,
86128
is_standalone: false,
87-
file_extension: 'R'
129+
file_extension: 'html'
88130
});
89131

90132
let results = html_extractor.extract_foreign_code(HTML_IN_PYTHON);
@@ -118,7 +160,7 @@ describe('RegExpForeignCodeExtractor', () => {
118160
let extractor = new RegExpForeignCodeExtractor({
119161
language: 'R',
120162
pattern: '^%%R( .*?)?\n([^]*)',
121-
extract_to_foreign: '$2',
163+
foreign_capture_groups: [2],
122164
keep_in_host: false,
123165
is_standalone: false,
124166
file_extension: 'R'
@@ -136,7 +178,7 @@ describe('RegExpForeignCodeExtractor', () => {
136178
let r_line_extractor = new RegExpForeignCodeExtractor({
137179
language: 'R',
138180
pattern: '(^|\n)%R (.*)\n?',
139-
extract_to_foreign: '$2',
181+
foreign_capture_groups: [2],
140182
keep_in_host: false,
141183
is_standalone: false,
142184
file_extension: 'R'

packages/jupyterlab-lsp/src/extractors/regexp.ts

Lines changed: 92 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,40 @@ import { position_at_offset } from '../positioning';
33
import { replacer } from '../overrides/tokens';
44
import { CodeEditor } from '@jupyterlab/codeeditor';
55

6+
export function getIndexOfCaptureGroup(
7+
expression: RegExp,
8+
matched_string: string,
9+
value_of_captured_group: string
10+
): number {
11+
// TODO: use https://github.com/tc39/proposal-regexp-match-indices once supported in >95% of browsers
12+
// (probably around 2025)
13+
14+
// get index of the part that is being extracted to foreign document
15+
let captured_groups = expression.exec(matched_string);
16+
let offset_in_match = 0;
17+
18+
// first element is full match
19+
let full_matched = captured_groups[0];
20+
21+
for (let group of captured_groups.slice(1)) {
22+
if (typeof group === 'undefined') {
23+
continue;
24+
}
25+
26+
if (group === value_of_captured_group) {
27+
offset_in_match += full_matched.indexOf(group);
28+
break;
29+
}
30+
31+
let group_end_offset = full_matched.indexOf(group) + group.length;
32+
33+
full_matched = full_matched.slice(group_end_offset);
34+
offset_in_match += group_end_offset;
35+
}
36+
37+
return offset_in_match;
38+
}
39+
640
export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
741
options: RegExpForeignCodeExtractor.IOptions;
842
language: string;
@@ -37,14 +71,28 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
3771
let match: RegExpExecArray = this.global_expression.exec(code);
3872
let host_code_fragment: string;
3973

74+
let chosen_replacer: string | replacer;
75+
let is_new_api_replacer: boolean = false;
76+
77+
if (typeof this.options.foreign_replacer !== 'undefined') {
78+
chosen_replacer = this.options.foreign_replacer;
79+
is_new_api_replacer = true;
80+
} else if (typeof this.options.foreign_capture_groups !== 'undefined') {
81+
chosen_replacer = '$' + this.options.foreign_capture_groups.join('$');
82+
is_new_api_replacer = true;
83+
} else {
84+
chosen_replacer = this.options.extract_to_foreign;
85+
}
86+
4087
while (match != null) {
4188
let matched_string = match[0];
4289
let position_shift: CodeEditor.IPosition = null;
90+
4391
let foreign_code_fragment = matched_string.replace(
4492
this.expression,
4593
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
4694
// @ts-ignore
47-
this.options.extract_to_foreign
95+
chosen_replacer
4896
);
4997
let prefix = '';
5098
if (typeof this.options.extract_arguments !== 'undefined') {
@@ -72,11 +120,23 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
72120
}
73121
}
74122

75-
// TODO: this could be slightly optimized (start at start) by using the match[n],
76-
// where n is the group to be used; while this reduces the flexibility of extract_to_foreign,
77-
// it might be better to enforce such strict requirement
78-
let start_offset =
79-
match.index + matched_string.indexOf(foreign_code_fragment);
123+
let foreign_code_group_value = foreign_code_fragment;
124+
125+
if (is_new_api_replacer) {
126+
foreign_code_group_value = matched_string.replace(
127+
this.expression,
128+
'$' + Math.min(...this.options.foreign_capture_groups)
129+
);
130+
}
131+
132+
const foreign_group_index_in_match = getIndexOfCaptureGroup(
133+
this.expression,
134+
matched_string,
135+
foreign_code_group_value
136+
);
137+
138+
let start_offset = match.index + foreign_group_index_in_match;
139+
80140
let start = position_at_offset(start_offset, lines);
81141
let end = position_at_offset(
82142
start_offset + foreign_code_fragment.length,
@@ -118,16 +178,35 @@ namespace RegExpForeignCodeExtractor {
118178
* String giving regular expression to test cells for the foreign language presence.
119179
*
120180
* For example:
121-
* - %%R( (.*))?\n(.*) will match R cells of rpy2
122-
* - (.*)'<html>(.*)</html>'(.*) will match html documents in strings of any language using single ticks
181+
* - `%%R( (.*))?\n(.*)` will match R cells of rpy2
182+
* - `(.*)'<html>(.*)</html>'(.*)` will match html documents in strings of any language using single ticks
123183
*/
124184
pattern: string;
125185
/**
126-
* String specifying match groups to be extracted from the regular expression match,
186+
* Array of numbers specifying match groups to be extracted from the regular expression match,
127187
* for the use in virtual document of the foreign language.
128-
* For the R example this should be '$3'
188+
* For the R example this should be `3`. Please not that these are 1-based, as the 0th index is the full match.
189+
* If multiple groups are given, those will be concatenated.
190+
*
191+
* If additional code is needed in between the groups, use `foreign_replacer` in addition to
192+
* `foreign_capture_groups` (but not instead!).
193+
*
194+
* `foreign_capture_groups` is required for proper offset calculation and will no longer be optional in 4.0.
195+
*/
196+
foreign_capture_groups?: number[];
197+
/**
198+
* Function to compose the foreign document code, in case if using a capture group alone is not sufficient;
199+
* If specified, `foreign_capture_group` should be specified as well, so that it points to the first occurrence
200+
* of the foreign code. When both are specified, `foreign_replacer` takes precedence.
201+
*
202+
* See:
203+
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_function_as_a_parameter
129204
*/
130-
extract_to_foreign: string | replacer;
205+
foreign_replacer?: replacer;
206+
/**
207+
* @deprecated `extract_to_foreign` will be removed in 4.0; use `foreign_capture_group` or `foreign_replacer` instead
208+
*/
209+
extract_to_foreign?: string | replacer;
131210
/**
132211
* If arguments from the cell or line magic are to be extracted and prepended before the extracted code,
133212
* set extract_arguments to a replacer function taking the code and returning the string to be prepended.
@@ -143,6 +222,8 @@ namespace RegExpForeignCodeExtractor {
143222
*
144223
* Setting to false is DEPRECATED as it breaks the edit feature (while it could be fixed,
145224
* it would make the code considerably more complex).
225+
*
226+
* @deprecated `keep_in_host` will be removed in 4.0
146227
*/
147228
keep_in_host?: boolean;
148229
/**

packages/jupyterlab-lsp/src/extractors/testutils.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,14 @@ export function extract_code(document: VirtualDocument, code: string) {
1212
);
1313
}
1414

15+
interface IDocumentWithRange {
16+
range: CodeEditor.IRange;
17+
virtual_document: VirtualDocument;
18+
}
19+
1520
export function get_the_only_pair(
1621
foreign_document_map: Map<CodeEditor.IRange, IVirtualDocumentBlock>
17-
) {
22+
): IDocumentWithRange {
1823
expect(foreign_document_map.size).to.equal(1);
1924

2025
let range = foreign_document_map.keys().next().value;

packages/jupyterlab-lsp/src/transclusions/ipython-bigquery/extractors.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ export let foreign_code_extractors: IForeignCodeExtractorsRegistry = {
2626
python: [
2727
new RegExpForeignCodeExtractor({
2828
language: 'sql',
29-
pattern: `^%%bigquery(?: (?:${SQL_URL_PATTERN}|${COMMAND_PATTERN}|(?:\\w+ << )|(?:\\w+@\\w+)))?\n?(.+\n)?([^]*)`,
30-
extract_to_foreign: '$1$2',
29+
pattern: `^%%bigquery(?: (?:${SQL_URL_PATTERN}|${COMMAND_PATTERN}|(?:\\w+ << )|(?:\\w+@\\w+)))?\n?((?:.+\n)?(?:[^]*))`,
30+
foreign_capture_groups: [1],
3131
is_standalone: true,
3232
file_extension: 'sql'
3333
})

0 commit comments

Comments
 (0)