Skip to content

Commit 6325e8d

Browse files
authored
Handle other cases in the str.isspace() method (#2586)
* Handle unescaping of `\f` * Handle other cases in the `str.isspace()` method * Add tests
1 parent 90c1094 commit 6325e8d

File tree

3 files changed

+70
-6
lines changed

3 files changed

+70
-6
lines changed

integration_tests/test_str_attributes.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,42 @@ def is_title():
354354
assert " ".istitle() == False
355355

356356
def is_space():
357+
s0: str = ""
358+
assert s0.isspace() == False
359+
assert "".isspace() == False
360+
361+
s1: str = " \t\n\v\f\r"
362+
assert s1.isspace() == True
363+
assert " \t\n\v\f\r".isspace() == True
364+
365+
s2: str = " \t\n\v\f\rabcd"
366+
assert s2.isspace() == False
367+
assert " \t\n\v\f\rabcd".isspace() == False
368+
369+
s3: str = "abcd \t\n\v\f\ref"
370+
assert s3.isspace() == False
371+
assert "abcd \t\n\v\f\ref".isspace() == False
372+
373+
s4: str = " \\t\n\v\f\r"
374+
assert s4.isspace() == False
375+
assert " \\t\n\v\f\r".isspace() == False
376+
377+
s5: str = " \\t\\n\\v\\f\\r"
378+
assert s5.isspace() == False
379+
assert " \\t\\n\\v\\f\\r".isspace() == False
380+
381+
s6: str = "Hello, LPython!\n"
382+
assert s6.isspace() == False
383+
assert "Hello, LPython!\n".isspace() == False
384+
385+
s7: str = "\t\tHello! \n"
386+
assert s7.isspace() == False
387+
assert "\t\tHello! \n".isspace() == False
388+
389+
s8: str = " \t \n \v \f \r "
390+
assert s8.isspace() == True
391+
assert " \t \n \v \f \r ".isspace() == True
392+
357393
assert "\n".isspace() == True
358394
assert " ".isspace() == True
359395
assert "\r".isspace() == True

src/libasr/string_utils.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ char* str_unescape_c(Allocator &al, LCompilers::Str &s) {
192192
} else if (s[idx] == '\\' && s[idx+1] == 'v') {
193193
x += "\v";
194194
idx++;
195+
} else if (s[idx] == '\\' && s[idx + 1] == 'f') {
196+
x += "\f";
197+
idx++;
195198
} else if (s[idx] == '\\' && s[idx+1] == '\\') {
196199
x += "\\";
197200
idx++;

src/runtime/lpython_builtin.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -805,8 +805,6 @@ def _lpython_str_istitle(s: str) -> bool:
805805

806806
return True if not only_whitespace else False
807807

808-
809-
810808
@overload
811809
def _lpython_str_find(s: str, sub: str) -> i32:
812810
s_len :i32; sub_len :i32; flag: bool; _len: i32;
@@ -1051,16 +1049,42 @@ def _lpython_str_isascii(s: str) -> bool:
10511049
return False
10521050
return True
10531051

1054-
def _lpython_str_isspace(s:str) -> bool:
1052+
def _lpython_str_isspace(s: str) -> bool:
1053+
# A Unicode character is considered a 'whitespace' if it has has a bidirectional
1054+
# type 'WS', 'B' or 'S'; or the category 'Zs'.
10551055
if len(s) == 0:
10561056
return False
1057-
ch: str
1057+
1058+
ch: str
10581059
for ch in s:
1059-
if ch != ' ' and ch != '\t' and ch != '\n' and ch != '\r' and ch != '\f' and ch != '\v':
1060+
if not (ch == " " or # SPACE
1061+
ch == "\n" or # LINE FEED (LF)
1062+
ch == "\r" or # CARRIAGE RETURN (CR)
1063+
ch == "\t" or # CHARACTER TABULATION (HT)
1064+
ch == "\v" or # VERTICAL TAB (VT)
1065+
ch == "\f" or # FORM FEED (FF)
1066+
ch == "\u00A0" or # NO-BREAK SPACE
1067+
ch == "\u1680" or # OGHAM SPACE MARK
1068+
ch == "\u2000" or # EN QUAD
1069+
ch == "\u2001" or # EM QUAD
1070+
ch == "\u2002" or # EN SPACE
1071+
ch == "\u2003" or # EM SPACE
1072+
ch == "\u2004" or # THREE-PER-EM SPACE
1073+
ch == "\u2005" or # FOUR-PER-EM SPACE
1074+
ch == "\u2006" or # SIX-PER-EM SPACE
1075+
ch == "\u2007" or # FIGURE SPACE
1076+
ch == "\u2008" or # PUNCTUATION SPACE
1077+
ch == "\u2009" or # THIN SPACE
1078+
ch == "\u200A" or # HAIR SPACE
1079+
ch == "\u2028" or # LINE SEPARATOR
1080+
ch == "\u2029" or # PARAGRAPH SEPARATOR
1081+
ch == "\u202F" or # NARROW NO-BREAK SPACE
1082+
ch == "\u205F" or # MEDIUM MATHEMATICAL SPACE
1083+
ch == "\u3000" # IDEOGRAPHIC SPACE
1084+
):
10601085
return False
10611086
return True
10621087

1063-
10641088
def list(s: str) -> list[str]:
10651089
l: list[str] = []
10661090
i: i32
@@ -1069,3 +1093,4 @@ def list(s: str) -> list[str]:
10691093
for i in range(len(s)):
10701094
l.append(s[i])
10711095
return l
1096+

0 commit comments

Comments
 (0)