Add blank lines to separate blocks of indented code (#1515)

DonJayamanne · web-flow · commit da91e3fd6aa4 · 2018-04-30T15:31:07.000-07:00
Fixes #259
diff --git a/news/2 Fixes/259.md b/news/2 Fixes/259.md
@@ -0,0 +1 @@
+Add blank lines to separate blocks of indented code (function defs, classes, and the like) so as to ensure the code can be run within a Python interactive prompt.
diff --git a/pythonFiles/normalizeForInterpreter.py b/pythonFiles/normalizeForInterpreter.py
@@ -0,0 +1,116 @@
+import ast
+import io
+import operator
+import os
+import sys
+import token
+import tokenize
+
+
+class Visitor(ast.NodeVisitor):
+    def __init__(self, lines):
+        self._lines = lines
+        self.line_numbers_with_nodes = set()
+        self.line_numbers_with_statements = []
+
+    def generic_visit(self, node):
+        if hasattr(node, 'col_offset') and hasattr(node, 'lineno') and node.col_offset == 0:
+            self.line_numbers_with_nodes.add(node.lineno)
+            if isinstance(node, ast.stmt):
+                self.line_numbers_with_statements.append(node.lineno)
+
+        ast.NodeVisitor.generic_visit(self, node)
+
+
+def _tokenize(source):
+    """Tokenize Python source code."""
+    # Using an undocumented API as the documented one in Python 2.7 does not work as needed
+    # cross-version.
+    return tokenize.generate_tokens(io.StringIO(source).readline)
+
+
+def _indent_size(line):
+    for index, char in enumerate(line):
+        if not char.isspace():
+            return index
+
+
+def _get_global_statement_blocks(source, lines):
+    """Return a list of all global statement blocks.
+
+    The list comprises of 3-item tuples that contain the starting line number,
+    ending line number and whether the statement is a single line.
+
+    """
+    tree = ast.parse(source)
+    visitor = Visitor(lines)
+    visitor.visit(tree)
+
+    statement_ranges = []
+    for index, line_number in enumerate(visitor.line_numbers_with_statements):
+        remaining_line_numbers = visitor.line_numbers_with_statements[index+1:]
+        end_line_number = len(lines) if len(remaining_line_numbers) == 0 else min(remaining_line_numbers) - 1
+        current_statement_is_oneline = line_number == end_line_number
+
+        if len(statement_ranges) == 0:
+            statement_ranges.append((line_number, end_line_number, current_statement_is_oneline))
+            continue
+
+        previous_statement = statement_ranges[-1]
+        previous_statement_is_oneline = previous_statement[2]
+        if previous_statement_is_oneline and current_statement_is_oneline:
+            statement_ranges[-1] = previous_statement[0], end_line_number, True
+        else:
+            statement_ranges.append((line_number, end_line_number, current_statement_is_oneline))
+
+    return statement_ranges
+
+
+def normalize_lines(source):
+    """Normalize blank lines for sending to the terminal.
+
+    Blank lines within a statement block are removed to prevent the REPL
+    from thinking the block is finished. Newlines are added to separate
+    top-level statements so that the REPL does not think there is a syntax
+    error.
+
+    """
+    lines = source.splitlines(False)
+    # Find out if we have any trailing blank lines
+    has_blank_lines = len(lines[-1].strip()) == 0 or source.endswith(os.linesep)
+
+    # Step 1: Remove empty lines.
+    tokens = _tokenize(source)
+    newlines_indexes_to_remove = (spos[0] for (toknum, tokval, spos, epos, line) in tokens
+                                    if len(line.strip()) == 0 and token.tok_name[toknum] == 'NL' and spos[0] == epos[0])
+
+    for line_number in reversed(list(newlines_indexes_to_remove)):
+        del lines[line_number-1]
+
+    # Step 2: Add blank lines between each global statement block.
+    # A consequtive single lines blocks of code will be treated as a single statement,
+    # just to ensure we do not unnecessarily add too many blank lines.
+    source = os.linesep.join(lines)
+    tokens = _tokenize(source)
+    dedent_indexes = (spos[0] for (toknum, tokval, spos, epos, line) in tokens
+                                if toknum == token.DEDENT and _indent_size(line) == 0)
+
+    global_statement_ranges = _get_global_statement_blocks(source, lines)
+
+    for line_number in filter(lambda x: x > 1, map(operator.itemgetter(0), reversed(global_statement_ranges))):
+        lines.insert(line_number-1, '')
+
+    sys.stdout.write(os.linesep.join(lines) + (os.linesep if has_blank_lines else ''))
+    sys.stdout.flush()
+
+
+if __name__ == '__main__':
+    contents = sys.argv[1]
+    try:
+        default_encoding = sys.getdefaultencoding()
+        contents = contents.encode(default_encoding, 'surrogateescape').decode(default_encoding, 'replace')
+    except (UnicodeError, LookupError):
+        pass
+    if isinstance(contents, bytes):
+        contents = contents.decode('utf8')
+    normalize_lines(contents)
diff --git a/src/client/terminals/codeExecution/helper.ts b/src/client/terminals/codeExecution/helper.ts
@@ -2,30 +2,41 @@
 // Licensed under the MIT License.
 
 import { inject, injectable } from 'inversify';
+import * as path from 'path';
 import { Range, TextEditor, Uri } from 'vscode';
 import { IApplicationShell, IDocumentManager } from '../../common/application/types';
-import { PYTHON_LANGUAGE } from '../../common/constants';
+import { EXTENSION_ROOT_DIR, PYTHON_LANGUAGE } from '../../common/constants';
 import '../../common/extensions';
+import { IProcessService } from '../../common/process/types';
+import { IConfigurationService } from '../../common/types';
+import { IEnvironmentVariablesProvider } from '../../common/variables/types';
 import { IServiceContainer } from '../../ioc/types';
 import { ICodeExecutionHelper } from '../types';
 
 @injectable()
 export class CodeExecutionHelper implements ICodeExecutionHelper {
     private readonly documentManager: IDocumentManager;
     private readonly applicationShell: IApplicationShell;
+    private readonly envVariablesProvider: IEnvironmentVariablesProvider;
+    private readonly processService: IProcessService;
+    private readonly configurationService: IConfigurationService;
     constructor(@inject(IServiceContainer) serviceContainer: IServiceContainer) {
         this.documentManager = serviceContainer.get<IDocumentManager>(IDocumentManager);
         this.applicationShell = serviceContainer.get<IApplicationShell>(IApplicationShell);
+        this.envVariablesProvider = serviceContainer.get<IEnvironmentVariablesProvider>(IEnvironmentVariablesProvider);
+        this.processService = serviceContainer.get<IProcessService>(IProcessService);
+        this.configurationService = serviceContainer.get<IConfigurationService>(IConfigurationService);
     }
     public async normalizeLines(code: string, resource?: Uri): Promise<string> {
         try {
             if (code.trim().length === 0) {
                 return '';
             }
-            const regex = /(\n)([ \t]*\r?\n)([ \t]+\S+)/gm;
-            return code.replace(regex, (_, a, b, c) => {
-                return `${a}${c}`;
-            });
+            const env = await this.envVariablesProvider.getEnvironmentVariables(resource);
+            const pythonPath = this.configurationService.getSettings(resource).pythonPath;
+            const args = [path.join(EXTENSION_ROOT_DIR, 'pythonFiles', 'normalizeForInterpreter.py'), code];
+            const proc = await this.processService.exec(pythonPath, args, { env, throwOnStdErr: true });
+            return proc.stdout;
         } catch (ex) {
             console.error(ex, 'Python: Failed to normalize code for execution in terminal');
             return code;
diff --git a/src/test/pythonFiles/terminalExec/sample1_normalized.py b/src/test/pythonFiles/terminalExec/sample1_normalized.py
@@ -1,18 +1,21 @@
 # Sample block 1
+
 def square(x):
     return x**2
 
 print('hello')
 # Sample block 2
+
 a = 2
+
 if a < 2:
     print('less than 2')
 else:
     print('more than 2')
 
 print('hello')
-
 # Sample block 3
+
 for i in range(5):
     print(i)
     print(i)
diff --git a/src/test/pythonFiles/terminalExec/sample3_normalized.py b/src/test/pythonFiles/terminalExec/sample3_normalized.py
@@ -1,4 +1,5 @@
 if True:
     print(1)
     print(2)
+
 print(3)
diff --git a/src/test/pythonFiles/terminalExec/sample3_raw.py b/src/test/pythonFiles/terminalExec/sample3_raw.py
@@ -2,4 +2,5 @@
     print(1)
         
     print(2)
+
 print(3)
diff --git a/src/test/pythonFiles/terminalExec/sample6_normalized.py b/src/test/pythonFiles/terminalExec/sample6_normalized.py
@@ -0,0 +1,15 @@
+if True:
+    print(1)
+else: print(2)
+
+print('🔨')
+print(3)
+print(3)
+
+if True:
+    print(1)
+else: print(2)
+
+if True:
+    print(1)
+else: print(2)
diff --git a/src/test/pythonFiles/terminalExec/sample6_raw.py b/src/test/pythonFiles/terminalExec/sample6_raw.py
@@ -0,0 +1,12 @@
+if True:
+    print(1)
+else: print(2)
+print('🔨')
+print(3)
+print(3)
+if True:
+    print(1)
+else: print(2)
+if True:
+    print(1)
+else: print(2)
diff --git a/src/test/pythonFiles/terminalExec/sample7_normalized.py b/src/test/pythonFiles/terminalExec/sample7_normalized.py
@@ -0,0 +1,8 @@
+if True:
+    print(1)
+    print(1)
+else:
+    print(2)
+    print(2)
+
+print(3)
diff --git a/src/test/pythonFiles/terminalExec/sample7_raw.py b/src/test/pythonFiles/terminalExec/sample7_raw.py
@@ -0,0 +1,9 @@
+if True:
+    print(1)
+
+    print(1)
+else:
+    print(2)
+
+    print(2)
+print(3)
diff --git a/src/test/pythonFiles/terminalExec/sample_normalized.py b/src/test/pythonFiles/terminalExec/sample_normalized.py
@@ -0,0 +1,5 @@
+import sys
+print(sys.executable)
+print("1234")
+print(1)
+print(2)
diff --git a/src/test/pythonFiles/terminalExec/sample_raw.py b/src/test/pythonFiles/terminalExec/sample_raw.py
@@ -0,0 +1,8 @@
+import sys
+
+print(sys.executable)
+
+print("1234")
+
+print(1)
+print(2)
diff --git a/src/test/terminals/codeExecution/helper.test.ts b/src/test/terminals/codeExecution/helper.test.ts
@@ -11,9 +11,15 @@ import * as TypeMoq from 'typemoq';
 import { Range, Selection, TextDocument, TextEditor, TextLine, Uri } from 'vscode';
 import { IApplicationShell, IDocumentManager } from '../../../client/common/application/types';
 import { EXTENSION_ROOT_DIR, PYTHON_LANGUAGE } from '../../../client/common/constants';
+import { BufferDecoder } from '../../../client/common/process/decoder';
+import { ProcessService } from '../../../client/common/process/proc';
+import { IProcessService } from '../../../client/common/process/types';
+import { IConfigurationService, IPythonSettings } from '../../../client/common/types';
+import { IEnvironmentVariablesProvider } from '../../../client/common/variables/types';
 import { IServiceContainer } from '../../../client/ioc/types';
 import { CodeExecutionHelper } from '../../../client/terminals/codeExecution/helper';
 import { ICodeExecutionHelper } from '../../../client/terminals/types';
+import { PYTHON_PATH } from '../../common';
 
 const TEST_FILES_PATH = path.join(EXTENSION_ROOT_DIR, 'src', 'test', 'pythonFiles', 'terminalExec');
 
@@ -24,12 +30,24 @@ suite('Terminal - Code Execution Helper', () => {
     let helper: ICodeExecutionHelper;
     let document: TypeMoq.IMock<TextDocument>;
     let editor: TypeMoq.IMock<TextEditor>;
+    let processService: TypeMoq.IMock<IProcessService>;
+    let configService: TypeMoq.IMock<IConfigurationService>;
     setup(() => {
         const serviceContainer = TypeMoq.Mock.ofType<IServiceContainer>();
         documentManager = TypeMoq.Mock.ofType<IDocumentManager>();
         applicationShell = TypeMoq.Mock.ofType<IApplicationShell>();
+        const envVariablesProvider = TypeMoq.Mock.ofType<IEnvironmentVariablesProvider>();
+        processService = TypeMoq.Mock.ofType<IProcessService>();
+        configService = TypeMoq.Mock.ofType<IConfigurationService>();
+        const pythonSettings = TypeMoq.Mock.ofType<IPythonSettings>();
+        pythonSettings.setup(p => p.pythonPath).returns(() => PYTHON_PATH);
+        configService.setup(c => c.getSettings(TypeMoq.It.isAny())).returns(() => pythonSettings.object);
+        envVariablesProvider.setup(e => e.getEnvironmentVariables(TypeMoq.It.isAny())).returns(() => Promise.resolve({}));
         serviceContainer.setup(c => c.get(TypeMoq.It.isValue(IDocumentManager), TypeMoq.It.isAny())).returns(() => documentManager.object);
         serviceContainer.setup(c => c.get(TypeMoq.It.isValue(IApplicationShell), TypeMoq.It.isAny())).returns(() => applicationShell.object);
+        serviceContainer.setup(c => c.get(TypeMoq.It.isValue(IEnvironmentVariablesProvider), TypeMoq.It.isAny())).returns(() => envVariablesProvider.object);
+        serviceContainer.setup(c => c.get(TypeMoq.It.isValue(IProcessService), TypeMoq.It.isAny())).returns(() => processService.object);
+        serviceContainer.setup(c => c.get(TypeMoq.It.isValue(IConfigurationService), TypeMoq.It.isAny())).returns(() => configService.object);
         helper = new CodeExecutionHelper(serviceContainer.object);
 
         document = TypeMoq.Mock.ofType<TextDocument>();
@@ -38,18 +56,23 @@ suite('Terminal - Code Execution Helper', () => {
     });
 
     async function ensureBlankLinesAreRemoved(source: string, expectedSource: string) {
+        const actualProcessService = new ProcessService(new BufferDecoder());
+        processService.setup(p => p.exec(TypeMoq.It.isAny(), TypeMoq.It.isAny(), TypeMoq.It.isAny()))
+            .returns((file, args, options) => {
+                return actualProcessService.exec.apply(actualProcessService, [file, args, options]);
+            });
         const normalizedZCode = await helper.normalizeLines(source);
         expect(normalizedZCode).to.be.equal(expectedSource);
     }
     test('Ensure blank lines are NOT removed when code is not indented (simple)', async () => {
-        const code = ['import sys', '', 'print(sys.executable)', '', 'print("1234")', '', 'print(1)', 'print(2)'];
-        const expectedCode = code.join(EOL);
+        const code = ['import sys', '', '', '', 'print(sys.executable)', '', 'print("1234")', '', '', 'print(1)', 'print(2)'];
+        const expectedCode = code.filter(line => line.trim().length > 0).join(EOL);
         await ensureBlankLinesAreRemoved(code.join(EOL), expectedCode);
     });
-    ['sample1', 'sample2', 'sample3', 'sample4', 'sample5'].forEach(fileName => {
-        test(`Ensure blank lines are removed (${fileName})`, async () => {
-            const code = await fs.readFile(path.join(TEST_FILES_PATH, `${fileName}_raw.py`), 'utf8');
-            const expectedCode = await fs.readFile(path.join(TEST_FILES_PATH, `${fileName}_normalized.py`), 'utf8');
+    ['', '1', '2', '3', '4', '5', '6', '7'].forEach(fileNameSuffix => {
+        test(`Ensure blank lines are removed (Sample${fileNameSuffix})`, async () => {
+            const code = await fs.readFile(path.join(TEST_FILES_PATH, `sample${fileNameSuffix}_raw.py`), 'utf8');
+            const expectedCode = await fs.readFile(path.join(TEST_FILES_PATH, `sample${fileNameSuffix}_normalized.py`), 'utf8');
             await ensureBlankLinesAreRemoved(code, expectedCode);
         });
         // test(`Ensure blank lines are removed, including leading empty lines (${fileName})`, async () => {
@@ -58,11 +81,6 @@ suite('Terminal - Code Execution Helper', () => {
         //     await ensureBlankLinesAreRemoved(['', '', ''].join(EOL) + EOL + code, expectedCode);
         // });
     });
-    test('Ensure blank lines are removed (sample2)', async () => {
-        const code = await fs.readFile(path.join(TEST_FILES_PATH, 'sample2_raw.py'), 'utf8');
-        const expectedCode = await fs.readFile(path.join(TEST_FILES_PATH, 'sample2_normalized.py'), 'utf8');
-        await ensureBlankLinesAreRemoved(code, expectedCode);
-    });
     test('Display message if there\s no active file', async () => {
         documentManager.setup(doc => doc.activeTextEditor).returns(() => undefined);
 
diff --git a/src/test/terminals/codeExecution/terminalCodeExec.test.ts b/src/test/terminals/codeExecution/terminalCodeExec.test.ts
@@ -18,7 +18,7 @@ import { ICodeExecutionService } from '../../../client/terminals/types';
 import { PYTHON_PATH } from '../../common';
 
 // tslint:disable-next-line:max-func-body-length
-suite('Terminal Code Execution', () => {
+suite('Terminal - Code Execution', () => {
     // tslint:disable-next-line:max-func-body-length
     ['Terminal Execution', 'Repl Execution', 'Django Execution'].forEach(testSuiteName => {
         let terminalSettings: TypeMoq.IMock<ITerminalSettings>;

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Add blank lines to separate blocks of indented code (function defs, classes, and the like) so as to ensure the code can be run within a Python interactive prompt.`