@@ -3,6 +3,38 @@ import { position_at_offset } from '../positioning';
3
3
import { replacer } from '../overrides/tokens' ;
4
4
import { CodeEditor } from '@jupyterlab/codeeditor' ;
5
5
6
+
7
+ export function getIndexOfCaptureGroup ( expression : RegExp , matched_string : string , value_of_captured_group : string ) : number {
8
+ // TODO: use https://github.com/tc39/proposal-regexp-match-indices once supported in >95% of browsers
9
+ // (probably around 2025)
10
+
11
+ // get index of the part that is being extracted to foreign document
12
+ let captured_groups = expression . exec ( matched_string ) ;
13
+ let offset_in_match = 0 ;
14
+
15
+ // first element is full match
16
+ let full_matched = captured_groups [ 0 ] ;
17
+
18
+ for ( let group of captured_groups . slice ( 1 ) ) {
19
+
20
+ if ( typeof group === 'undefined' ) {
21
+ continue ;
22
+ }
23
+
24
+ if ( group === value_of_captured_group ) {
25
+ offset_in_match += full_matched . indexOf ( group ) ;
26
+ break ;
27
+ }
28
+
29
+ let group_end_offset = full_matched . indexOf ( group ) + group . length ;
30
+
31
+ full_matched = full_matched . slice ( group_end_offset ) ;
32
+ offset_in_match += group_end_offset ;
33
+ }
34
+
35
+ return offset_in_match ;
36
+ }
37
+
6
38
export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
7
39
options : RegExpForeignCodeExtractor . IOptions ;
8
40
language : string ;
@@ -37,14 +69,18 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
37
69
let match : RegExpExecArray = this . global_expression . exec ( code ) ;
38
70
let host_code_fragment : string ;
39
71
72
+ let new_api_replacer = typeof this . options . foreign_replacer !== 'undefined' ? this . options . foreign_replacer : ( '$' + this . options . foreign_capture_group ) ;
73
+ const replacer = typeof this . options . extract_to_foreign !== 'undefined' ? this . options . extract_to_foreign : new_api_replacer ;
74
+
40
75
while ( match != null ) {
41
76
let matched_string = match [ 0 ] ;
42
77
let position_shift : CodeEditor . IPosition = null ;
78
+
43
79
let foreign_code_fragment = matched_string . replace (
44
80
this . expression ,
45
81
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
46
82
// @ts -ignore
47
- this . options . extract_to_foreign
83
+ replacer
48
84
) ;
49
85
let prefix = '' ;
50
86
if ( typeof this . options . extract_arguments !== 'undefined' ) {
@@ -72,11 +108,22 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
72
108
}
73
109
}
74
110
75
- // TODO: this could be slightly optimized (start at start) by using the match[n],
76
- // where n is the group to be used; while this reduces the flexibility of extract_to_foreign,
77
- // it might be better to enforce such strict requirement
111
+ let foreign_code_group_value = foreign_code_fragment ;
112
+
113
+ if ( new_api_replacer ) {
114
+ foreign_code_group_value = matched_string . replace (
115
+ this . expression ,
116
+ '$' + this . options . foreign_capture_group
117
+ ) ;
118
+ }
119
+
120
+ const foreign_group_index_in_match = getIndexOfCaptureGroup (
121
+ this . expression , matched_string , foreign_code_group_value
122
+ ) ;
123
+
78
124
let start_offset =
79
- match . index + matched_string . indexOf ( foreign_code_fragment ) ;
125
+ match . index + foreign_group_index_in_match ;
126
+
80
127
let start = position_at_offset ( start_offset , lines ) ;
81
128
let end = position_at_offset (
82
129
start_offset + foreign_code_fragment . length ,
@@ -118,16 +165,36 @@ namespace RegExpForeignCodeExtractor {
118
165
* String giving regular expression to test cells for the foreign language presence.
119
166
*
120
167
* For example:
121
- * - %%R( (.*))?\n(.*) will match R cells of rpy2
122
- * - (.*)'<html>(.*)</html>'(.*) will match html documents in strings of any language using single ticks
168
+ * - ` %%R( (.*))?\n(.*)` will match R cells of rpy2
169
+ * - ` (.*)'<html>(.*)</html>'(.*)` will match html documents in strings of any language using single ticks
123
170
*/
124
171
pattern : string ;
125
172
/**
126
173
* String specifying match groups to be extracted from the regular expression match,
127
174
* for the use in virtual document of the foreign language.
128
- * For the R example this should be '$3'
175
+ * For the R example this should be `3`. Please not that these are 1-based, as the 0th index is the full match.
176
+ *
177
+ * If more than one capture group is needed to extract the code (which is rarely the case:
178
+ * usually one can use non-capturing groups rather than multiple adjacent capturing groups),
179
+ * specify the first capturing group to allow for proper calculation of the start offset,
180
+ * and handle any additional groups using `foreign_replacer`.
181
+ *
182
+ * `foreign_capture_group` is required for proper offset calculation and will no longer be optional in 4.0.
183
+ */
184
+ foreign_capture_group ?: number ;
185
+ /**
186
+ * Function to compose the foreign document code, in case if using a capture group alone is not sufficient;
187
+ * If specified, `foreign_capture_group` should be specified as well, so that it points to the first occurrence
188
+ * of the foreign code. When both are specified, `foreign_replacer` takes precedence.
189
+ *
190
+ * See:
191
+ * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_function_as_a_parameter
192
+ */
193
+ foreign_replacer ?: replacer ;
194
+ /**
195
+ * @deprecated `extract_to_foreign` will be removed in 4.0; use `foreign_capture_group` or `foreign_replacer` instead
129
196
*/
130
- extract_to_foreign : string | replacer ;
197
+ extract_to_foreign ? : string | replacer ;
131
198
/**
132
199
* If arguments from the cell or line magic are to be extracted and prepended before the extracted code,
133
200
* set extract_arguments to a replacer function taking the code and returning the string to be prepended.
@@ -143,6 +210,8 @@ namespace RegExpForeignCodeExtractor {
143
210
*
144
211
* Setting to false is DEPRECATED as it breaks the edit feature (while it could be fixed,
145
212
* it would make the code considerably more complex).
213
+ *
214
+ * @deprecated `keep_in_host` will be removed in 4.0
146
215
*/
147
216
keep_in_host ?: boolean ;
148
217
/**
0 commit comments