jupyter-lsp
diff --git a/‎atest/05_Features/Completion.robot
Lines changed: 22 additions & 0 deletions b/‎atest/05_Features/Completion.robot
Lines changed: 22 additions & 0 deletions
diff --git a/‎atest/examples/Completion.ipynb
Lines changed: 50 additions & 0 deletions b/‎atest/examples/Completion.ipynb
Lines changed: 50 additions & 0 deletions
diff --git a/‎packages/jupyterlab-lsp/src/extractors/regexp.spec.ts
Lines changed: 52 additions & 8 deletions b/‎packages/jupyterlab-lsp/src/extractors/regexp.spec.ts
Lines changed: 52 additions & 8 deletions
diff --git a/‎packages/jupyterlab-lsp/src/extractors/regexp.ts
Lines changed: 78 additions & 9 deletions b/‎packages/jupyterlab-lsp/src/extractors/regexp.ts
Lines changed: 78 additions & 9 deletions
diff --git a/‎packages/jupyterlab-lsp/src/extractors/testutils.ts
Lines changed: 6 additions & 1 deletion b/‎packages/jupyterlab-lsp/src/extractors/testutils.ts
Lines changed: 6 additions & 1 deletion
@@ -296,6 +296,28 @@ Shows Documentation With CompletionItem Resolve
     Completer Should Include Documentation    the default method of the
     [Teardown]    Clean Up After Working With File    completion.R
 
+Shows Only Relevant Suggestions In Known Magics
+    # https://github.com/krassowski/jupyterlab-lsp/issues/559
+    # h<tab>
+    Enter Cell Editor    20    line=2
+    Trigger Completer
+    Completer Should Suggest    help
+    Completer Should Not Suggest  from
+    Completer Should Suggest    hash
+
+Completes In R Magics
+    # Proper completion in R magics needs to be tested as:
+    # - R magic extractor uses a tailor-made replacer function, not tested elsewhere
+    # - R lanugage server is very sensitive to off-by-one errors (see https://github.com/REditorSupport/languageserver/issues/395)
+    # '%%R\n librar<tab>'
+    Enter Cell Editor    22    line=2
+    Trigger Completer
+    Completer Should Suggest    library
+    # '%R lib<tab>'
+    Enter Cell Editor    24    line=1
+    Trigger Completer
+    Completer Should Suggest    library
+
 *** Keywords ***
 Setup Completion Test
     Setup Notebook    Python    Completion.ipynb
 
@@ -150,6 +150,56 @@
    "source": [
     "t"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Cell magics show only relevant suggestions (triggering after `h` should return `hash`, `help`, etc, but not `from`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%python\n",
+    "h"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Test that the leading space does not cause issues in R cell magic:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%R\n",
+    " librar"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And that R line magic works too:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%R li"
+   ]
   }
  ],
  "metadata": {
 
@@ -1,10 +1,13 @@
 import { expect } from 'chai';
-import { RegExpForeignCodeExtractor } from './regexp';
+import { getIndexOfCaptureGroup, RegExpForeignCodeExtractor } from './regexp';
 
 let R_CELL_MAGIC_EXISTS = `%%R
 some text
 `;
 
+let PYTHON_CELL_MAGIC_WITH_H = `%%python
+h`;
+
 let NO_CELL_MAGIC = `%R
 some text
 %%R
@@ -23,11 +26,24 @@ x = """<a href="#">
 </a>""";
 print(x)`;
 
+
+describe('getIndexOfCaptureGroup', () => {
+  it('extracts index of a captured group', () => {
+    // tests for https://github.com/krassowski/jupyterlab-lsp/issues/559
+    let result = getIndexOfCaptureGroup(
+      new RegExp('^%%(python|python2|python3|pypy)( .*?)?\n([^]*)'),
+      '%%python\nh',
+      'h'
+    );
+    expect(result).to.be.equal(9);
+  });
+});
+
 describe('RegExpForeignCodeExtractor', () => {
   let r_cell_extractor = new RegExpForeignCodeExtractor({
     language: 'R',
     pattern: '^%%R( .*?)?\n([^]*)',
-    extract_to_foreign: '$2',
+    foreign_capture_group: 2,
     keep_in_host: true,
     is_standalone: false,
     file_extension: 'R'
@@ -36,12 +52,21 @@ describe('RegExpForeignCodeExtractor', () => {
   let r_line_extractor = new RegExpForeignCodeExtractor({
     language: 'R',
     pattern: '(^|\n)%R (.*)\n?',
-    extract_to_foreign: '$2',
+    foreign_capture_group: 2,
     keep_in_host: true,
     is_standalone: false,
     file_extension: 'R'
   });
 
+  let python_cell_extractor = new RegExpForeignCodeExtractor({
+    language: 'python',
+    pattern: '^%%(python|python2|python3|pypy)( .*?)?\n([^]*)',
+    foreign_capture_group: 3,
+    keep_in_host: true,
+    is_standalone: true,
+    file_extension: 'py'
+  });
+
   describe('#has_foreign_code()', () => {
     it('detects cell magics', () => {
       let result = r_cell_extractor.has_foreign_code(R_CELL_MAGIC_EXISTS);
@@ -75,16 +100,35 @@ describe('RegExpForeignCodeExtractor', () => {
   });
 
   describe('#extract_foreign_code()', () => {
+
+    it('should correctly return the range', () => {
+      let results = python_cell_extractor.extract_foreign_code(PYTHON_CELL_MAGIC_WITH_H);
+      expect(results.length).to.equal(1);
+
+      let result = results[0];
+
+      // test against https://github.com/krassowski/jupyterlab-lsp/issues/559
+      expect(result.host_code).to.equal(PYTHON_CELL_MAGIC_WITH_H);
+      expect(result.foreign_code).to.equal(
+        'h'
+      );
+
+      expect(result.range.start.line).to.equal(1);
+      expect(result.range.start.column).to.equal(0);
+      expect(result.range.end.line).to.equal(1);
+      expect(result.range.end.column).to.equal(1);
+    });
+
     it('should work with non-line magic and non-cell magic code snippets as well', () => {
       // Note: in the real application, one should NOT use regular expressions for HTML extraction
 
       let html_extractor = new RegExpForeignCodeExtractor({
         language: 'HTML',
-        pattern: '<(.*?)( .*?)?>([^]*?)</\\1>',
-        extract_to_foreign: '<$1$2>$3</$1>',
+        pattern: '(<(.*?)( .*?)?>([^]*?)</\\2>)',
+        foreign_capture_group: 1,
         keep_in_host: false,
         is_standalone: false,
-        file_extension: 'R'
+        file_extension: 'html'
       });
 
       let results = html_extractor.extract_foreign_code(HTML_IN_PYTHON);
@@ -118,7 +162,7 @@ describe('RegExpForeignCodeExtractor', () => {
       let extractor = new RegExpForeignCodeExtractor({
         language: 'R',
         pattern: '^%%R( .*?)?\n([^]*)',
-        extract_to_foreign: '$2',
+        foreign_capture_group: 2,
         keep_in_host: false,
         is_standalone: false,
         file_extension: 'R'
@@ -136,7 +180,7 @@ describe('RegExpForeignCodeExtractor', () => {
       let r_line_extractor = new RegExpForeignCodeExtractor({
         language: 'R',
         pattern: '(^|\n)%R (.*)\n?',
-        extract_to_foreign: '$2',
+        foreign_capture_group: 2,
         keep_in_host: false,
         is_standalone: false,
         file_extension: 'R'
 
@@ -3,6 +3,38 @@ import { position_at_offset } from '../positioning';
 import { replacer } from '../overrides/tokens';
 import { CodeEditor } from '@jupyterlab/codeeditor';
 
+
+export function getIndexOfCaptureGroup(expression: RegExp, matched_string: string, value_of_captured_group: string): number {
+  // TODO: use https://github.com/tc39/proposal-regexp-match-indices once supported in >95% of browsers
+  //  (probably around 2025)
+
+  // get index of the part that is being extracted to foreign document
+  let captured_groups = expression.exec(matched_string);
+  let offset_in_match = 0;
+
+  // first element is full match
+  let full_matched = captured_groups[0];
+
+  for (let group of captured_groups.slice(1)) {
+
+    if (typeof group === 'undefined') {
+      continue;
+    }
+
+    if (group === value_of_captured_group) {
+      offset_in_match += full_matched.indexOf(group);
+      break;
+    }
+
+    let group_end_offset = full_matched.indexOf(group) + group.length;
+
+    full_matched = full_matched.slice(group_end_offset);
+    offset_in_match += group_end_offset;
+  }
+
+  return offset_in_match;
+}
+
 export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
   options: RegExpForeignCodeExtractor.IOptions;
   language: string;
@@ -37,14 +69,18 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
     let match: RegExpExecArray = this.global_expression.exec(code);
     let host_code_fragment: string;
 
+    let new_api_replacer = typeof this.options.foreign_replacer !== 'undefined' ? this.options.foreign_replacer : ('$' + this.options.foreign_capture_group);
+    const replacer = typeof this.options.extract_to_foreign !== 'undefined' ? this.options.extract_to_foreign : new_api_replacer;
+
     while (match != null) {
       let matched_string = match[0];
       let position_shift: CodeEditor.IPosition = null;
+
       let foreign_code_fragment = matched_string.replace(
         this.expression,
         // eslint-disable-next-line @typescript-eslint/ban-ts-comment
         // @ts-ignore
-        this.options.extract_to_foreign
+        replacer
       );
       let prefix = '';
       if (typeof this.options.extract_arguments !== 'undefined') {
@@ -72,11 +108,22 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
         }
       }
 
-      // TODO: this could be slightly optimized (start at start) by using the match[n],
-      //  where n is the group to be used; while this reduces the flexibility of extract_to_foreign,
-      //  it might be better to enforce such strict requirement
+      let foreign_code_group_value = foreign_code_fragment;
+
+      if (new_api_replacer) {
+        foreign_code_group_value = matched_string.replace(
+          this.expression,
+          '$' + this.options.foreign_capture_group
+        );
+      }
+
+      const foreign_group_index_in_match = getIndexOfCaptureGroup(
+          this.expression, matched_string, foreign_code_group_value
+      );
+
       let start_offset =
-        match.index + matched_string.indexOf(foreign_code_fragment);
+        match.index + foreign_group_index_in_match;
+
       let start = position_at_offset(start_offset, lines);
       let end = position_at_offset(
         start_offset + foreign_code_fragment.length,
@@ -118,16 +165,36 @@ namespace RegExpForeignCodeExtractor {
      * String giving regular expression to test cells for the foreign language presence.
      *
      * For example:
-     *   - %%R( (.*))?\n(.*) will match R cells of rpy2
-     *   - (.*)'<html>(.*)</html>'(.*) will match html documents in strings of any language using single ticks
+     *   - `%%R( (.*))?\n(.*)` will match R cells of rpy2
+     *   - `(.*)'<html>(.*)</html>'(.*)` will match html documents in strings of any language using single ticks
      */
     pattern: string;
     /**
      * String specifying match groups to be extracted from the regular expression match,
      * for the use in virtual document of the foreign language.
-     * For the R example this should be '$3'
+     * For the R example this should be `3`. Please not that these are 1-based, as the 0th index is the full match.
+     *
+     * If more than one capture group is needed to extract the code (which is rarely the case:
+     * usually one can use non-capturing groups rather than multiple adjacent capturing groups),
+     * specify the first capturing group to allow for proper calculation of the start offset,
+     * and handle any additional groups using `foreign_replacer`.
+     *
+     * `foreign_capture_group` is required for proper offset calculation and will no longer be optional in 4.0.
+     */
+    foreign_capture_group?: number;
+    /**
+     * Function to compose the foreign document code, in case if using a capture group alone is not sufficient;
+     * If specified, `foreign_capture_group` should be specified as well, so that it points to the first occurrence
+     * of the foreign code. When both are specified, `foreign_replacer` takes precedence.
+     *
+     * See:
+     * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_function_as_a_parameter
+     */
+    foreign_replacer?: replacer;
+    /**
+     * @deprecated `extract_to_foreign` will be removed in 4.0; use `foreign_capture_group` or `foreign_replacer` instead
      */
-    extract_to_foreign: string | replacer;
+    extract_to_foreign?: string | replacer;
     /**
      * If arguments from the cell or line magic are to be extracted and prepended before the extracted code,
      * set extract_arguments to a replacer function taking the code and returning the string to be prepended.
@@ -143,6 +210,8 @@ namespace RegExpForeignCodeExtractor {
      *
      * Setting to false is DEPRECATED as it breaks the edit feature (while it could be fixed,
      * it would make the code considerably more complex).
+     *
+     * @deprecated `keep_in_host` will be removed in 4.0
      */
     keep_in_host?: boolean;
     /**
 
@@ -12,9 +12,14 @@ export function extract_code(document: VirtualDocument, code: string) {
   );
 }
 
+interface IDocumentWithRange {
+  range: CodeEditor.IRange;
+  virtual_document: VirtualDocument;
+}
+
 export function get_the_only_pair(
   foreign_document_map: Map<CodeEditor.IRange, IVirtualDocumentBlock>
-) {
+): IDocumentWithRange {
   expect(foreign_document_map.size).to.equal(1);
 
   let range = foreign_document_map.keys().next().value;