Skip to content

Commit bf59fb4

Browse files
authored
Merge pull request #1762 from Shaikh-Ubaid/asr_unescaped_strings
ASR: Keep strings unescaped in AST, ASR
2 parents e432e7a + ad5a1db commit bf59fb4

14 files changed

+235
-78
lines changed

src/libasr/asdl_cpp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,12 +1615,12 @@ def visitField(self, field, cons):
16151615
elif field.type == "string" and not field.seq:
16161616
if field.opt:
16171617
self.emit("if (x.m_%s) {" % field.name, 2)
1618-
self.emit( 's.append("\\"" + std::string(x.m_%s) + "\\"");' % field.name, 3)
1618+
self.emit( 's.append("\\"" + get_escaped_str(x.m_%s) + "\\"");' % field.name, 3)
16191619
self.emit("} else {", 2)
16201620
self.emit( 's.append("()");', 3)
16211621
self.emit("}", 2)
16221622
else:
1623-
self.emit('s.append("\\"" + std::string(x.m_%s) + "\\"");' % field.name, 2)
1623+
self.emit('s.append("\\"" + get_escaped_str(x.m_%s) + "\\"");' % field.name, 2)
16241624
elif field.type == "int" and not field.seq:
16251625
if field.opt:
16261626
self.emit("if (x.m_%s) {" % field.name, 2)

src/libasr/codegen/asr_to_c_cpp.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -935,8 +935,20 @@ R"(#include <stdio.h>
935935
void visit_StringConstant(const ASR::StringConstant_t &x) {
936936
src = "\"";
937937
std::string s = x.m_s;
938-
for (size_t idx=0; idx < s.size(); idx++) {
939-
src += s[idx];
938+
for (size_t idx = 0; idx < s.size(); idx++) {
939+
if (s[idx] == '\n') {
940+
src += "\\n";
941+
} else if (s[idx] == '\t') {
942+
src += "\\t";
943+
} else if (s[idx] == '\r') {
944+
src += "\\r";
945+
}else if (s[idx] == '\\') {
946+
src += "\\\\";
947+
} else if (s[idx] == '\"') {
948+
src += "\\\"";
949+
} else {
950+
src += s[idx];
951+
}
940952
}
941953
src += "\"";
942954
last_expr_precedence = 2;

src/libasr/codegen/asr_to_julia.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,8 +1441,16 @@ class ASRToJuliaVisitor : public ASR::BaseVisitor<ASRToJuliaVisitor>
14411441
{
14421442
src = "\"";
14431443
std::string s = x.m_s;
1444-
for (size_t idx=0; idx < s.size(); idx++) {
1445-
src += s[idx];
1444+
for (size_t idx = 0; idx < s.size(); idx++) {
1445+
if (s[idx] == '\n') {
1446+
src += "\\n";
1447+
} else if (s[idx] == '\\') {
1448+
src += "\\\\";
1449+
} else if (s[idx] == '\"') {
1450+
src += "\\\"";
1451+
} else {
1452+
src += s[idx];
1453+
}
14461454
}
14471455
src += "\"";
14481456
last_expr_precedence = julia_prec::Base;

src/libasr/codegen/asr_to_llvm.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6006,8 +6006,7 @@ class ASRToLLVMVisitor : public ASR::BaseVisitor<ASRToLLVMVisitor>
60066006
}
60076007

60086008
void visit_StringConstant(const ASR::StringConstant_t &x) {
6009-
std::string s = unescape_string(al, x.m_s);
6010-
tmp = builder->CreateGlobalStringPtr(s);
6009+
tmp = builder->CreateGlobalStringPtr(x.m_s);
60116010
}
60126011

60136012
inline void fetch_ptr(ASR::Variable_t* x) {

src/libasr/containers.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,10 @@ struct Str {
235235
// Returns a copy of the string as a NULL terminated std::string
236236
std::string str() const { return std::string(p, n); }
237237

238+
char operator[](size_t pos) {
239+
return p[pos];
240+
}
241+
238242
// Initializes Str from std::string by making a copy excluding the null char
239243
void from_str(Allocator &al, const std::string &s) {
240244
n = s.size();

src/libasr/string_utils.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,20 @@ std::string join_paths(const std::vector<std::string> &paths) {
154154
return p;
155155
}
156156

157-
std::string unescape_string(Allocator &/*al*/, std::string s) {
157+
char* unescape_string(Allocator &al, LCompilers::Str &s) {
158158
std::string x;
159159
for (size_t idx=0; idx < s.size(); idx++) {
160-
if (s[idx] == '\\' && s[idx+1] == 'n') {
160+
if (s[idx] == '\\' && s[idx+1] == '\n') { // continuation character
161+
idx++;
162+
} else if (s[idx] == '\\' && s[idx+1] == 'n') {
161163
x += "\n";
162164
idx++;
163165
} else if (s[idx] == '\\' && s[idx+1] == 't') {
164166
x += "\t";
165167
idx++;
168+
} else if (s[idx] == '\\' && s[idx+1] == 'r') {
169+
x += "\r";
170+
idx++;
166171
} else if (s[idx] == '\\' && s[idx+1] == 'b') {
167172
x += "\b";
168173
idx++;
@@ -175,11 +180,14 @@ std::string unescape_string(Allocator &/*al*/, std::string s) {
175180
} else if (s[idx] == '\\' && s[idx+1] == '"') {
176181
x += '"';
177182
idx++;
183+
} else if (s[idx] == '\\' && s[idx+1] == '\'') {
184+
x += '\'';
185+
idx++;
178186
} else {
179187
x += s[idx];
180188
}
181189
}
182-
return x;
190+
return LCompilers::s2c(al, x);
183191
}
184192

185193
} // namespace LCompilers

src/libasr/string_utils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <cctype>
77

88
#include <libasr/alloc.h>
9+
#include <libasr/containers.h>
910

1011
namespace LCompilers {
1112

@@ -37,7 +38,7 @@ bool is_relative_path(const std::string &path);
3738
// Joins paths (paths can be empty)
3839
std::string join_paths(const std::vector<std::string> &paths);
3940

40-
std::string unescape_string(Allocator &al, std::string s);
41+
char* unescape_string(Allocator &al, LCompilers::Str &s);
4142

4243
} // namespace LCompilers
4344

src/lpython/parser/semantics.h

Lines changed: 9 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -720,55 +720,6 @@ static inline ast_t* BOOLOP_01(Allocator &al, Location &loc,
720720
#define COMPARE(x, op, y, l) make_Compare_t(p.m_a, l, \
721721
EXPR(x), cmpopType::op, EXPRS(A2LIST(p.m_a, y)), 1)
722722

723-
char *get_raw_string(Allocator &al, std::string s) {
724-
std::string x;
725-
for (size_t idx=0; idx < s.size(); idx++) {
726-
if (s[idx] == '\n') {
727-
x += "\\n";
728-
} else if (s[idx] == '\t') {
729-
x += "\\t";
730-
} else if (s[idx] == '\b') {
731-
x += "\\b";
732-
} else if (s[idx] == '\v') {
733-
x += "\\v";
734-
} else if (s[idx] == '\\' && (s[idx+1] == 'n' || s[idx+1] == 'N'
735-
|| s[idx+1] == 't' || s[idx+1] == 'b' || s[idx+1] == 'v')) {
736-
x += "\\\\";
737-
x += s[idx+1];
738-
idx++;
739-
} else if (s[idx] == '\\') {
740-
x += "\\\\";
741-
} else if (s[idx] == '"') {
742-
x += "\\\"";
743-
} else {
744-
x += s[idx];
745-
}
746-
}
747-
return LCompilers::s2c(al, x);
748-
}
749-
750-
char* escape_string(Allocator &al, LCompilers::Str &s) {
751-
std::string x;
752-
for (size_t idx=0; idx < s.size(); idx++) {
753-
if (s.p[idx] == '\n') {
754-
x += "\\n";
755-
} else if (s.p[idx] == '\\' && s.p[idx+1] == '\n') {
756-
idx++;
757-
} else if (s.p[idx] == '\\' && s.p[idx+1] == '\\') {
758-
x += "\\\\";
759-
idx++;
760-
} else if (s.p[idx] == '\\' && s.p[idx+1] == '\'') {
761-
x += '\'';
762-
idx++;
763-
} else if (s.p[idx-1] != '\\' && s.p[idx] == '"') {
764-
x += "\\\"";
765-
} else {
766-
x += s.p[idx];
767-
}
768-
}
769-
return LCompilers::s2c(al, x);
770-
}
771-
772723
static inline ast_t* concat_string(Allocator &al, Location &l,
773724
expr_t *string, std::string str, expr_t *string_literal) {
774725
std::string str1 = "";
@@ -847,8 +798,8 @@ static inline ast_t* concat_string(Allocator &al, Location &l,
847798
x.c_str(p.m_a), expr_contextType::Load)
848799
// `x.int_n` is of type BigInt but we store the int64_t directly in AST
849800
#define INTEGER(x, l) make_ConstantInt_t(p.m_a, l, x, nullptr)
850-
#define STRING1(x, l) make_ConstantStr_t(p.m_a, l, escape_string(p.m_a, x), nullptr)
851-
#define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), escape_string(p.m_a, y), nullptr)
801+
#define STRING1(x, l) make_ConstantStr_t(p.m_a, l, unescape_string(p.m_a, x), nullptr)
802+
#define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), unescape_string(p.m_a, y), nullptr)
852803
#define STRING3(id, x, l) PREFIX_STRING(p.m_a, l, name2char(id), x.c_str(p.m_a))
853804
#define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s))
854805
#define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr)
@@ -900,9 +851,6 @@ static inline ast_t *PREFIX_STRING(Allocator &al, Location &l, char *prefix, cha
900851
for (size_t i = 0; i < strs.size(); i++) {
901852
if (strs[i][0] == '"') {
902853
strs[i] = strs[i].substr(1, strs[i].length() - 2);
903-
if (strcmp(prefix, "fr") == 0 || strcmp(prefix, "rf") == 0) {
904-
strs[i] = std::string(get_raw_string(al, strs[i]));
905-
}
906854
tmp = make_ConstantStr_t(al, l, LCompilers::s2c(al, strs[i]), nullptr);
907855
exprs.push_back(al, down_cast<expr_t>(tmp));
908856
} else {
@@ -914,18 +862,16 @@ static inline ast_t *PREFIX_STRING(Allocator &al, Location &l, char *prefix, cha
914862
}
915863
tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size());
916864
} else if (strcmp(prefix, "b") == 0) {
917-
std::string str_1 = std::string(s);
918-
LCompilers::Str str_2;
919-
str_2.from_str_view(str_1);
920-
str_1 = escape_string(al, str_2);
921-
str_1 = "b'" + str_1 + "'";
922-
tmp = make_ConstantBytes_t(al, l, LCompilers::s2c(al, str_1), nullptr);
923-
} else if (strcmp(prefix, "br") == 0|| strcmp(prefix, "rb") == 0) {
924-
std::string str = std::string(get_raw_string(al, std::string(s)));
865+
LCompilers::Str s_;
866+
s_.from_str(al, std::string(s));
867+
std::string str = std::string(unescape_string(al, s_));
868+
str = "b'" + str + "'";
869+
tmp = make_ConstantBytes_t(al, l, LCompilers::s2c(al, str), nullptr);
870+
} else if ( strcmp(prefix, "br") == 0 || strcmp(prefix, "rb") == 0) {
871+
std::string str = std::string(s);
925872
str = "b'" + str + "'";
926873
tmp = make_ConstantBytes_t(al, l, LCompilers::s2c(al, str), nullptr);
927874
} else if (strcmp(prefix, "r") == 0 ) {
928-
s = get_raw_string(al, std::string(s));
929875
tmp = make_ConstantStr_t(al, l, s, nullptr);
930876
} else if (strcmp(prefix, "u") == 0 ) {
931877
tmp = make_ConstantStr_t(al, l, s, LCompilers::s2c(al, "u"));

tests/parser/string3.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"\n"
2+
"\r"
3+
"\t"
4+
"'"
5+
"\""
6+
"\\"
7+
"\n\r\t'\"\\"
8+
'\n'
9+
'\r'
10+
'\t'
11+
'\''
12+
'"'
13+
'\\'
14+
'\n\r\t\'"\\'
15+
16+
"hi\n"
17+
"hi\r\n\t"
18+
"1234\n\t\\jdlasf\t"
19+
'Hi this is a "test case".'
20+
21+
"""
22+
This is \n
23+
abcd123
24+
This is \r
25+
efgh
26+
"""
27+
28+
"""\
29+
This is \n
30+
"""
31+
32+
"hi\
33+
dsjfklad"

tests/reference/asr-test_end_sep_keywords-49ea13f.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"outfile": null,
77
"outfile_hash": null,
88
"stdout": "asr-test_end_sep_keywords-49ea13f.stdout",
9-
"stdout_hash": "abd3d56db58711bac46b8548acd917a01d4d1671dbd2898906f04c19",
9+
"stdout_hash": "fdc9aacfe26de399fedb9c7de6d585f7068fe51c2ea5bce7f3d42827",
1010
"stderr": null,
1111
"stderr_hash": null,
1212
"returncode": 0

0 commit comments

Comments
 (0)