Skip to content

Commit 2f1ea73

Browse files
committed
_ASCII_CHARS, set operators
1 parent 7f7dca1 commit 2f1ea73

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

Lib/csv.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ class excel:
8484
__version__ = "1.0"
8585

8686

87+
_ASCII_CHARS = frozenset(map(chr, range(127))) # 7-bit ASCII
88+
8789
class Dialect:
8890
"""Describe a CSV dialect.
8991
@@ -368,8 +370,6 @@ def _guess_delimiter(self, data, delimiters):
368370

369371
data = list(filter(None, data.split('\n')))
370372

371-
ascii = {chr(c) for c in range(127)} # 7-bit ASCII
372-
373373
# build frequency tables
374374
chunkLength = min(10, len(data))
375375
iteration = 0
@@ -381,15 +381,15 @@ def _guess_delimiter(self, data, delimiters):
381381
while start < len(data):
382382
iteration += 1
383383
chunk = data[start:end]
384-
candidate_chars = set("".join(chunk))
385-
candidate_chars.intersection_update(ascii)
384+
candidate_chars = set().union(*chunk)
385+
candidate_chars &= _ASCII_CHARS
386386
for line in chunk:
387387
for char in candidate_chars:
388388
count = line.count(char)
389389
charFrequency[char][count] += 1
390390

391391
# must count even if frequency is 0
392-
missing_chars = ascii.difference(candidate_chars)
392+
missing_chars = _ASCII_CHARS - candidate_chars
393393
chunk_len = len(chunk)
394394
for char in missing_chars:
395395
charFrequency[char][0] += chunk_len

0 commit comments

Comments
 (0)