@@ -3,6 +3,40 @@ import { position_at_offset } from '../positioning';
33import { replacer } from '../overrides/tokens' ;
44import { CodeEditor } from '@jupyterlab/codeeditor' ;
55
6+ export function getIndexOfCaptureGroup (
7+ expression : RegExp ,
8+ matched_string : string ,
9+ value_of_captured_group : string
10+ ) : number {
11+ // TODO: use https://github.com/tc39/proposal-regexp-match-indices once supported in >95% of browsers
12+ // (probably around 2025)
13+
14+ // get index of the part that is being extracted to foreign document
15+ let captured_groups = expression . exec ( matched_string ) ;
16+ let offset_in_match = 0 ;
17+
18+ // first element is full match
19+ let full_matched = captured_groups [ 0 ] ;
20+
21+ for ( let group of captured_groups . slice ( 1 ) ) {
22+ if ( typeof group === 'undefined' ) {
23+ continue ;
24+ }
25+
26+ if ( group === value_of_captured_group ) {
27+ offset_in_match += full_matched . indexOf ( group ) ;
28+ break ;
29+ }
30+
31+ let group_end_offset = full_matched . indexOf ( group ) + group . length ;
32+
33+ full_matched = full_matched . slice ( group_end_offset ) ;
34+ offset_in_match += group_end_offset ;
35+ }
36+
37+ return offset_in_match ;
38+ }
39+
640export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
741 options : RegExpForeignCodeExtractor . IOptions ;
842 language : string ;
@@ -37,14 +71,28 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
3771 let match : RegExpExecArray = this . global_expression . exec ( code ) ;
3872 let host_code_fragment : string ;
3973
74+ let chosen_replacer : string | replacer ;
75+ let is_new_api_replacer : boolean = false ;
76+
77+ if ( typeof this . options . foreign_replacer !== 'undefined' ) {
78+ chosen_replacer = this . options . foreign_replacer ;
79+ is_new_api_replacer = true ;
80+ } else if ( typeof this . options . foreign_capture_groups !== 'undefined' ) {
81+ chosen_replacer = '$' + this . options . foreign_capture_groups . join ( '$' ) ;
82+ is_new_api_replacer = true ;
83+ } else {
84+ chosen_replacer = this . options . extract_to_foreign ;
85+ }
86+
4087 while ( match != null ) {
4188 let matched_string = match [ 0 ] ;
4289 let position_shift : CodeEditor . IPosition = null ;
90+
4391 let foreign_code_fragment = matched_string . replace (
4492 this . expression ,
4593 // eslint-disable-next-line @typescript-eslint/ban-ts-comment
4694 // @ts -ignore
47- this . options . extract_to_foreign
95+ chosen_replacer
4896 ) ;
4997 let prefix = '' ;
5098 if ( typeof this . options . extract_arguments !== 'undefined' ) {
@@ -72,11 +120,23 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
72120 }
73121 }
74122
75- // TODO: this could be slightly optimized (start at start) by using the match[n],
76- // where n is the group to be used; while this reduces the flexibility of extract_to_foreign,
77- // it might be better to enforce such strict requirement
78- let start_offset =
79- match . index + matched_string . indexOf ( foreign_code_fragment ) ;
123+ let foreign_code_group_value = foreign_code_fragment ;
124+
125+ if ( is_new_api_replacer ) {
126+ foreign_code_group_value = matched_string . replace (
127+ this . expression ,
128+ '$' + Math . min ( ...this . options . foreign_capture_groups )
129+ ) ;
130+ }
131+
132+ const foreign_group_index_in_match = getIndexOfCaptureGroup (
133+ this . expression ,
134+ matched_string ,
135+ foreign_code_group_value
136+ ) ;
137+
138+ let start_offset = match . index + foreign_group_index_in_match ;
139+
80140 let start = position_at_offset ( start_offset , lines ) ;
81141 let end = position_at_offset (
82142 start_offset + foreign_code_fragment . length ,
@@ -118,16 +178,35 @@ namespace RegExpForeignCodeExtractor {
118178 * String giving regular expression to test cells for the foreign language presence.
119179 *
120180 * For example:
121- * - %%R( (.*))?\n(.*) will match R cells of rpy2
122- * - (.*)'<html>(.*)</html>'(.*) will match html documents in strings of any language using single ticks
181+ * - ` %%R( (.*))?\n(.*)` will match R cells of rpy2
182+ * - ` (.*)'<html>(.*)</html>'(.*)` will match html documents in strings of any language using single ticks
123183 */
124184 pattern : string ;
125185 /**
126- * String specifying match groups to be extracted from the regular expression match,
186+ * Array of numbers specifying match groups to be extracted from the regular expression match,
127187 * for the use in virtual document of the foreign language.
128- * For the R example this should be '$3'
188+ * For the R example this should be `3`. Please not that these are 1-based, as the 0th index is the full match.
189+ * If multiple groups are given, those will be concatenated.
190+ *
191+ * If additional code is needed in between the groups, use `foreign_replacer` in addition to
192+ * `foreign_capture_groups` (but not instead!).
193+ *
194+ * `foreign_capture_groups` is required for proper offset calculation and will no longer be optional in 4.0.
195+ */
196+ foreign_capture_groups ?: number [ ] ;
197+ /**
198+ * Function to compose the foreign document code, in case if using a capture group alone is not sufficient;
199+ * If specified, `foreign_capture_group` should be specified as well, so that it points to the first occurrence
200+ * of the foreign code. When both are specified, `foreign_replacer` takes precedence.
201+ *
202+ * See:
203+ * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_function_as_a_parameter
129204 */
130- extract_to_foreign : string | replacer ;
205+ foreign_replacer ?: replacer ;
206+ /**
207+ * @deprecated `extract_to_foreign` will be removed in 4.0; use `foreign_capture_group` or `foreign_replacer` instead
208+ */
209+ extract_to_foreign ?: string | replacer ;
131210 /**
132211 * If arguments from the cell or line magic are to be extracted and prepended before the extracted code,
133212 * set extract_arguments to a replacer function taking the code and returning the string to be prepended.
@@ -143,6 +222,8 @@ namespace RegExpForeignCodeExtractor {
143222 *
144223 * Setting to false is DEPRECATED as it breaks the edit feature (while it could be fixed,
145224 * it would make the code considerably more complex).
225+ *
226+ * @deprecated `keep_in_host` will be removed in 4.0
146227 */
147228 keep_in_host ?: boolean ;
148229 /**
0 commit comments