Skip to content

Commit 51c4238

Browse files
bpo-45692: Improve support of non-ASCII identifiers in IDLE
1 parent 456e27a commit 51c4238

File tree

6 files changed

+27
-45
lines changed

6 files changed

+27
-45
lines changed

Lib/idlelib/autocomplete.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@
2828
TRY_A = False, False, False, ATTRS # '.' for attributes.
2929
TRY_F = False, False, False, FILES # '/' in quotes for file name.
3030

31-
# This string includes all chars that may be in an identifier.
32-
# TODO Update this here and elsewhere.
33-
ID_CHARS = string.ascii_letters + string.digits + "_"
31+
# all ASCII chars that may be in an identifier
32+
_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
3433

3534
SEPS = f"{os.sep}{os.altsep if os.altsep else ''}"
3635
TRIGGERS = f".{SEPS}"
@@ -134,7 +133,11 @@ def open_completions(self, args):
134133
elif hp.is_in_code() and (not mode or mode==ATTRS):
135134
self._remove_autocomplete_window()
136135
mode = ATTRS
137-
while i and (curline[i-1] in ID_CHARS or ord(curline[i-1]) > 127):
136+
while i:
137+
c = curline[i-1]
138+
if c not in _ASCII_ID_CHARS:
139+
if c <= '\x7f' or not ('a' + c).isidentifier():
140+
break
138141
i -= 1
139142
comp_start = curline[i:j]
140143
if i and curline[i-1] == '.': # Need object with attributes.

Lib/idlelib/autoexpand.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@
1313
There is only one instance of Autoexpand.
1414
'''
1515
import re
16-
import string
1716

17+
_LAST_WORD_RE = re.compile(r'\b\w+\Z')
1818

1919
class AutoExpand:
20-
wordchars = string.ascii_letters + string.digits + "_"
21-
2220
def __init__(self, editwin):
2321
self.text = editwin.text
2422
self.bell = self.text.bell
@@ -85,10 +83,8 @@ def getwords(self):
8583
def getprevword(self):
8684
"Return the word prefix before the cursor."
8785
line = self.text.get("insert linestart", "insert")
88-
i = len(line)
89-
while i > 0 and line[i-1] in self.wordchars:
90-
i = i-1
91-
return line[i:]
86+
m = _LAST_WORD_RE.search(line)
87+
return m[0] if m else ''
9288

9389

9490
if __name__ == '__main__':

Lib/idlelib/editor.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44
import platform
55
import re
6-
import string
76
import sys
87
import tokenize
98
import traceback
@@ -806,14 +805,12 @@ def ResetColorizer(self):
806805
if self.line_numbers is not None:
807806
self.line_numbers.update_colors()
808807

809-
IDENTCHARS = string.ascii_letters + string.digits + "_"
810-
811808
def colorize_syntax_error(self, text, pos):
812809
text.tag_add("ERROR", pos)
813810
char = text.get(pos)
814-
if char and char in self.IDENTCHARS:
811+
if char and ('a' + char).isidentifier():
815812
text.tag_add("ERROR", pos + " wordstart", pos)
816-
if '\n' == text.get(pos): # error at line end
813+
if char == '\n': # error at line end
817814
text.mark_set("insert", pos)
818815
else:
819816
text.mark_set("insert", pos + "+1c")

Lib/idlelib/hyperparser.py

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,6 @@
1414
# all ASCII chars that may be the first char of an identifier
1515
_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
1616

17-
# lookup table for whether 7-bit ASCII chars are valid in a Python identifier
18-
_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
19-
# lookup table for whether 7-bit ASCII chars are valid as the first
20-
# char in a Python identifier
21-
_IS_ASCII_ID_FIRST_CHAR = \
22-
[(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
23-
2417

2518
class HyperParser:
2619
def __init__(self, editwin, index):
@@ -166,53 +159,47 @@ def _eat_identifier(cls, str, limit, pos):
166159
167160
This ignores non-identifier eywords are not identifiers.
168161
"""
169-
is_ascii_id_char = _IS_ASCII_ID_CHAR
170-
171162
# Start at the end (pos) and work backwards.
172163
i = pos
173164

174165
# Go backwards as long as the characters are valid ASCII
175166
# identifier characters. This is an optimization, since it
176167
# is faster in the common case where most of the characters
177168
# are ASCII.
178-
while i > limit and (
179-
ord(str[i - 1]) < 128 and
180-
is_ascii_id_char[ord(str[i - 1])]
181-
):
169+
while i > limit and str[i - 1] in _ASCII_ID_CHARS:
182170
i -= 1
183171

184172
# If the above loop ended due to reaching a non-ASCII
185173
# character, continue going backwards using the most generic
186174
# test for whether a string contains only valid identifier
187175
# characters.
188-
if i > limit and ord(str[i - 1]) >= 128:
189-
while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
176+
if i > limit and str[i - 1] > '\x7f':
177+
while i - 4 >= limit and ('a' + str[i - 4:i]).isidentifier():
190178
i -= 4
191-
if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
179+
if i - 2 >= limit and ('a' + str[i - 2:i]).isidentifier():
192180
i -= 2
193-
if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
181+
if i - 1 >= limit and ('a' + str[i - 1]).isidentifier():
194182
i -= 1
195183

196184
# The identifier candidate starts here. If it isn't a valid
197185
# identifier, don't eat anything. At this point that is only
198186
# possible if the first character isn't a valid first
199187
# character for an identifier.
200-
if not str[i:pos].isidentifier():
188+
if i < pos and not str[i].isidentifier():
201189
return 0
202190
elif i < pos:
203191
# All characters in str[i:pos] are valid ASCII identifier
204192
# characters, so it is enough to check that the first is
205193
# valid as the first character of an identifier.
206-
if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
194+
if str[i] not in _ASCII_ID_FIRST_CHARS:
207195
return 0
208196

209197
# All keywords are valid identifiers, but should not be
210198
# considered identifiers here, except for True, False and None.
211-
if i < pos and (
212-
iskeyword(str[i:pos]) and
213-
str[i:pos] not in cls._ID_KEYWORDS
214-
):
215-
return 0
199+
if i < pos:
200+
word = str[i:pos]
201+
if iskeyword(word) and word not in cls._ID_KEYWORDS:
202+
return 0
216203

217204
return pos - i
218205

Lib/idlelib/undo.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import string
2-
31
from idlelib.delegator import Delegator
42

53
# tkinter import not needed because module does not create widgets,
@@ -251,10 +249,8 @@ def merge(self, cmd):
251249
self.chars = self.chars + cmd.chars
252250
return True
253251

254-
alphanumeric = string.ascii_letters + string.digits + "_"
255-
256252
def classify(self, c):
257-
if c in self.alphanumeric:
253+
if ('a' + c).isidentifier():
258254
return "alphanumeric"
259255
if c == "\n":
260256
return "newline"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improve support of non-ASCII identifiers in IDLE
2+
(autoexpanding, autocompletion, undo, etc).y
3+

0 commit comments

Comments
 (0)