jupyter-lsp
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎atest/05_Features/Completion.robot‎
Lines changed: 22 additions & 0 deletions b/‎atest/05_Features/Completion.robot‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎atest/examples/Completion.ipynb‎
Lines changed: 50 additions & 0 deletions b/‎atest/examples/Completion.ipynb‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎packages/jupyterlab-lsp/src/extractors/regexp.spec.ts‎
Lines changed: 50 additions & 8 deletions b/‎packages/jupyterlab-lsp/src/extractors/regexp.spec.ts‎
Lines changed: 50 additions & 8 deletions
diff --git a/‎packages/jupyterlab-lsp/src/extractors/regexp.ts‎
Lines changed: 92 additions & 11 deletions b/‎packages/jupyterlab-lsp/src/extractors/regexp.ts‎
Lines changed: 92 additions & 11 deletions
diff --git a/‎packages/jupyterlab-lsp/src/extractors/testutils.ts‎
Lines changed: 6 additions & 1 deletion b/‎packages/jupyterlab-lsp/src/extractors/testutils.ts‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎packages/jupyterlab-lsp/src/transclusions/ipython-bigquery/extractors.ts‎
Lines changed: 2 additions & 2 deletions b/‎packages/jupyterlab-lsp/src/transclusions/ipython-bigquery/extractors.ts‎
Lines changed: 2 additions & 2 deletions
@@ -10,10 +10,12 @@
 
   - prevents throwing a highlights error when adding new cell with <kbd>Shift</kbd> + <kbd>Enter</kbd> ([#544])
   - fixes IPython `pinfo` and `pinfo2` (`?` and `??`) for identifiers containing `s` ([#547])
+  - fixes incorrect behaviour of LSP features in some IPython magics with single line of content ([#560])
 
 [#544]: https://github.com/krassowski/jupyterlab-lsp/pull/544
 [#547]: https://github.com/krassowski/jupyterlab-lsp/pull/547
 [#553]: https://github.com/krassowski/jupyterlab-lsp/pull/553
+[#560]: https://github.com/krassowski/jupyterlab-lsp/pull/560
 
 ### `jupyter-lsp 1.1.4` (2020-02-21)
 
 
@@ -296,6 +296,28 @@ Shows Documentation With CompletionItem Resolve
     Completer Should Include Documentation    the default method of the
     [Teardown]    Clean Up After Working With File    completion.R
 
+Shows Only Relevant Suggestions In Known Magics
+    # https://github.com/krassowski/jupyterlab-lsp/issues/559
+    # h<tab>
+    Enter Cell Editor    20    line=2
+    Trigger Completer
+    Completer Should Suggest    help
+    Completer Should Not Suggest    from
+    Completer Should Suggest    hash
+
+Completes In R Magics
+    # Proper completion in R magics needs to be tested as:
+    # - R magic extractor uses a tailor-made replacer function, not tested elsewhere
+    # - R lanugage server is very sensitive to off-by-one errors (see https://github.com/REditorSupport/languageserver/issues/395)
+    # '%%R\n librar<tab>'
+    Enter Cell Editor    22    line=2
+    Trigger Completer
+    Completer Should Suggest    library
+    # '%R lib<tab>'
+    Enter Cell Editor    24    line=1
+    Trigger Completer
+    Completer Should Suggest    library
+
 *** Keywords ***
 Setup Completion Test
     Setup Notebook    Python    Completion.ipynb
 
@@ -150,6 +150,56 @@
    "source": [
     "t"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Cell magics show only relevant suggestions (triggering after `h` should return `hash`, `help`, etc, but not `from`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%python\n",
+    "h"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Test that the leading space does not cause issues in R cell magic:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%R\n",
+    " librar"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And that R line magic works too:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%R li"
+   ]
   }
  ],
  "metadata": {
 
@@ -1,10 +1,13 @@
 import { expect } from 'chai';
-import { RegExpForeignCodeExtractor } from './regexp';
+import { getIndexOfCaptureGroup, RegExpForeignCodeExtractor } from './regexp';
 
 let R_CELL_MAGIC_EXISTS = `%%R
 some text
 `;
 
+let PYTHON_CELL_MAGIC_WITH_H = `%%python
+h`;
+
 let NO_CELL_MAGIC = `%R
 some text
 %%R
@@ -23,11 +26,23 @@ x = """<a href="#">
 </a>""";
 print(x)`;
 
+describe('getIndexOfCaptureGroup', () => {
+  it('extracts index of a captured group', () => {
+    // tests for https://github.com/krassowski/jupyterlab-lsp/issues/559
+    let result = getIndexOfCaptureGroup(
+      new RegExp('^%%(python|python2|python3|pypy)( .*?)?\n([^]*)'),
+      '%%python\nh',
+      'h'
+    );
+    expect(result).to.be.equal(9);
+  });
+});
+
 describe('RegExpForeignCodeExtractor', () => {
   let r_cell_extractor = new RegExpForeignCodeExtractor({
     language: 'R',
     pattern: '^%%R( .*?)?\n([^]*)',
-    extract_to_foreign: '$2',
+    foreign_capture_groups: [2],
     keep_in_host: true,
     is_standalone: false,
     file_extension: 'R'
@@ -36,12 +51,21 @@ describe('RegExpForeignCodeExtractor', () => {
   let r_line_extractor = new RegExpForeignCodeExtractor({
     language: 'R',
     pattern: '(^|\n)%R (.*)\n?',
-    extract_to_foreign: '$2',
+    foreign_capture_groups: [2],
     keep_in_host: true,
     is_standalone: false,
     file_extension: 'R'
   });
 
+  let python_cell_extractor = new RegExpForeignCodeExtractor({
+    language: 'python',
+    pattern: '^%%(python|python2|python3|pypy)( .*?)?\n([^]*)',
+    foreign_capture_groups: [3],
+    keep_in_host: true,
+    is_standalone: true,
+    file_extension: 'py'
+  });
+
   describe('#has_foreign_code()', () => {
     it('detects cell magics', () => {
       let result = r_cell_extractor.has_foreign_code(R_CELL_MAGIC_EXISTS);
@@ -75,16 +99,34 @@ describe('RegExpForeignCodeExtractor', () => {
   });
 
   describe('#extract_foreign_code()', () => {
+    it('should correctly return the range', () => {
+      let results = python_cell_extractor.extract_foreign_code(
+        PYTHON_CELL_MAGIC_WITH_H
+      );
+      expect(results.length).to.equal(1);
+
+      let result = results[0];
+
+      // test against https://github.com/krassowski/jupyterlab-lsp/issues/559
+      expect(result.host_code).to.equal(PYTHON_CELL_MAGIC_WITH_H);
+      expect(result.foreign_code).to.equal('h');
+
+      expect(result.range.start.line).to.equal(1);
+      expect(result.range.start.column).to.equal(0);
+      expect(result.range.end.line).to.equal(1);
+      expect(result.range.end.column).to.equal(1);
+    });
+
     it('should work with non-line magic and non-cell magic code snippets as well', () => {
       // Note: in the real application, one should NOT use regular expressions for HTML extraction
 
       let html_extractor = new RegExpForeignCodeExtractor({
         language: 'HTML',
-        pattern: '<(.*?)( .*?)?>([^]*?)</\\1>',
-        extract_to_foreign: '<$1$2>$3</$1>',
+        pattern: '(<(.*?)( .*?)?>([^]*?)</\\2>)',
+        foreign_capture_groups: [1],
         keep_in_host: false,
         is_standalone: false,
-        file_extension: 'R'
+        file_extension: 'html'
       });
 
       let results = html_extractor.extract_foreign_code(HTML_IN_PYTHON);
@@ -118,7 +160,7 @@ describe('RegExpForeignCodeExtractor', () => {
       let extractor = new RegExpForeignCodeExtractor({
         language: 'R',
         pattern: '^%%R( .*?)?\n([^]*)',
-        extract_to_foreign: '$2',
+        foreign_capture_groups: [2],
         keep_in_host: false,
         is_standalone: false,
         file_extension: 'R'
@@ -136,7 +178,7 @@ describe('RegExpForeignCodeExtractor', () => {
       let r_line_extractor = new RegExpForeignCodeExtractor({
         language: 'R',
         pattern: '(^|\n)%R (.*)\n?',
-        extract_to_foreign: '$2',
+        foreign_capture_groups: [2],
         keep_in_host: false,
         is_standalone: false,
         file_extension: 'R'
 
@@ -3,6 +3,40 @@ import { position_at_offset } from '../positioning';
 import { replacer } from '../overrides/tokens';
 import { CodeEditor } from '@jupyterlab/codeeditor';
 
+export function getIndexOfCaptureGroup(
+  expression: RegExp,
+  matched_string: string,
+  value_of_captured_group: string
+): number {
+  // TODO: use https://github.com/tc39/proposal-regexp-match-indices once supported in >95% of browsers
+  //  (probably around 2025)
+
+  // get index of the part that is being extracted to foreign document
+  let captured_groups = expression.exec(matched_string);
+  let offset_in_match = 0;
+
+  // first element is full match
+  let full_matched = captured_groups[0];
+
+  for (let group of captured_groups.slice(1)) {
+    if (typeof group === 'undefined') {
+      continue;
+    }
+
+    if (group === value_of_captured_group) {
+      offset_in_match += full_matched.indexOf(group);
+      break;
+    }
+
+    let group_end_offset = full_matched.indexOf(group) + group.length;
+
+    full_matched = full_matched.slice(group_end_offset);
+    offset_in_match += group_end_offset;
+  }
+
+  return offset_in_match;
+}
+
 export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
   options: RegExpForeignCodeExtractor.IOptions;
   language: string;
@@ -37,14 +71,28 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
     let match: RegExpExecArray = this.global_expression.exec(code);
     let host_code_fragment: string;
 
+    let chosen_replacer: string | replacer;
+    let is_new_api_replacer: boolean = false;
+
+    if (typeof this.options.foreign_replacer !== 'undefined') {
+      chosen_replacer = this.options.foreign_replacer;
+      is_new_api_replacer = true;
+    } else if (typeof this.options.foreign_capture_groups !== 'undefined') {
+      chosen_replacer = '$' + this.options.foreign_capture_groups.join('$');
+      is_new_api_replacer = true;
+    } else {
+      chosen_replacer = this.options.extract_to_foreign;
+    }
+
     while (match != null) {
       let matched_string = match[0];
       let position_shift: CodeEditor.IPosition = null;
+
       let foreign_code_fragment = matched_string.replace(
         this.expression,
         // eslint-disable-next-line @typescript-eslint/ban-ts-comment
         // @ts-ignore
-        this.options.extract_to_foreign
+        chosen_replacer
       );
       let prefix = '';
       if (typeof this.options.extract_arguments !== 'undefined') {
@@ -72,11 +120,23 @@ export class RegExpForeignCodeExtractor implements IForeignCodeExtractor {
         }
       }
 
-      // TODO: this could be slightly optimized (start at start) by using the match[n],
-      //  where n is the group to be used; while this reduces the flexibility of extract_to_foreign,
-      //  it might be better to enforce such strict requirement
-      let start_offset =
-        match.index + matched_string.indexOf(foreign_code_fragment);
+      let foreign_code_group_value = foreign_code_fragment;
+
+      if (is_new_api_replacer) {
+        foreign_code_group_value = matched_string.replace(
+          this.expression,
+          '$' + Math.min(...this.options.foreign_capture_groups)
+        );
+      }
+
+      const foreign_group_index_in_match = getIndexOfCaptureGroup(
+        this.expression,
+        matched_string,
+        foreign_code_group_value
+      );
+
+      let start_offset = match.index + foreign_group_index_in_match;
+
       let start = position_at_offset(start_offset, lines);
       let end = position_at_offset(
         start_offset + foreign_code_fragment.length,
@@ -118,16 +178,35 @@ namespace RegExpForeignCodeExtractor {
      * String giving regular expression to test cells for the foreign language presence.
      *
      * For example:
-     *   - %%R( (.*))?\n(.*) will match R cells of rpy2
-     *   - (.*)'<html>(.*)</html>'(.*) will match html documents in strings of any language using single ticks
+     *   - `%%R( (.*))?\n(.*)` will match R cells of rpy2
+     *   - `(.*)'<html>(.*)</html>'(.*)` will match html documents in strings of any language using single ticks
      */
     pattern: string;
     /**
-     * String specifying match groups to be extracted from the regular expression match,
+     * Array of numbers specifying match groups to be extracted from the regular expression match,
      * for the use in virtual document of the foreign language.
-     * For the R example this should be '$3'
+     * For the R example this should be `3`. Please not that these are 1-based, as the 0th index is the full match.
+     * If multiple groups are given, those will be concatenated.
+     *
+     * If additional code is needed in between the groups, use `foreign_replacer` in addition to
+     * `foreign_capture_groups` (but not instead!).
+     *
+     * `foreign_capture_groups` is required for proper offset calculation and will no longer be optional in 4.0.
+     */
+    foreign_capture_groups?: number[];
+    /**
+     * Function to compose the foreign document code, in case if using a capture group alone is not sufficient;
+     * If specified, `foreign_capture_group` should be specified as well, so that it points to the first occurrence
+     * of the foreign code. When both are specified, `foreign_replacer` takes precedence.
+     *
+     * See:
+     * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_function_as_a_parameter
      */
-    extract_to_foreign: string | replacer;
+    foreign_replacer?: replacer;
+    /**
+     * @deprecated `extract_to_foreign` will be removed in 4.0; use `foreign_capture_group` or `foreign_replacer` instead
+     */
+    extract_to_foreign?: string | replacer;
     /**
      * If arguments from the cell or line magic are to be extracted and prepended before the extracted code,
      * set extract_arguments to a replacer function taking the code and returning the string to be prepended.
@@ -143,6 +222,8 @@ namespace RegExpForeignCodeExtractor {
      *
      * Setting to false is DEPRECATED as it breaks the edit feature (while it could be fixed,
      * it would make the code considerably more complex).
+     *
+     * @deprecated `keep_in_host` will be removed in 4.0
      */
     keep_in_host?: boolean;
     /**
 
@@ -12,9 +12,14 @@ export function extract_code(document: VirtualDocument, code: string) {
   );
 }
 
+interface IDocumentWithRange {
+  range: CodeEditor.IRange;
+  virtual_document: VirtualDocument;
+}
+
 export function get_the_only_pair(
   foreign_document_map: Map<CodeEditor.IRange, IVirtualDocumentBlock>
-) {
+): IDocumentWithRange {
   expect(foreign_document_map.size).to.equal(1);
 
   let range = foreign_document_map.keys().next().value;
 
@@ -26,8 +26,8 @@ export let foreign_code_extractors: IForeignCodeExtractorsRegistry = {
   python: [
     new RegExpForeignCodeExtractor({
       language: 'sql',
-      pattern: `^%%bigquery(?: (?:${SQL_URL_PATTERN}|${COMMAND_PATTERN}|(?:\\w+ << )|(?:\\w+@\\w+)))?\n?(.+\n)?([^]*)`,
-      extract_to_foreign: '$1$2',
+      pattern: `^%%bigquery(?: (?:${SQL_URL_PATTERN}|${COMMAND_PATTERN}|(?:\\w+ << )|(?:\\w+@\\w+)))?\n?((?:.+\n)?(?:[^]*))`,
+      foreign_capture_groups: [1],
       is_standalone: true,
       file_extension: 'sql'
     })