Skip to content

Commit 45526bd

Browse files
committed
Refactor CSVDialect, CSVWriter, CSVReader, CSVWriterBuiltins for performance and readibility.
Refactorings include: - Move toString() call on StringBuilder behind TruffleBoundary in CSVWriterBuiltins - Store line terminator code points and perform checks if a code point is contained in line terminator directly - Simplify CSVWriter joining fields
1 parent 03cad50 commit 45526bd

File tree

4 files changed

+43
-33
lines changed

4 files changed

+43
-33
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVDialect.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
package com.oracle.graal.python.builtins.modules.csv;
22

3-
import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject;
4-
import com.oracle.truffle.api.object.Shape;
5-
63
import static com.oracle.graal.python.builtins.modules.csv.CSVModuleBuiltins.NOT_SET;
74
import static com.oracle.graal.python.builtins.modules.csv.CSVModuleBuiltins.NOT_SET_CODEPOINT;
85

6+
import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject;
7+
import com.oracle.truffle.api.object.Shape;
8+
99
public final class CSVDialect extends PythonBuiltinObject {
1010
String delimiter; /* field separator */
1111
boolean doubleQuote; /* is " represented by ""? */
@@ -19,6 +19,10 @@ public final class CSVDialect extends PythonBuiltinObject {
1919
int delimiterCodePoint; /* code point representation for handling utf-32 delimiters */
2020
int escapeCharCodePoint; /* code point representation for handling utf-32 escape chars */
2121
int quoteCharCodePoint; /* code point representation for handling utf-32 quote chars */
22+
int[] lineTerminatorCodePoints; /*
23+
* code point representation for handling utf-32 chars in line
24+
* terminator
25+
*/
2226

2327
public CSVDialect(Object cls, Shape instanceShape) {
2428
super(cls, instanceShape);
@@ -40,5 +44,6 @@ public CSVDialect(Object cls, Shape instanceShape, String delimiter, boolean dou
4044
this.delimiterCodePoint = this.delimiter == NOT_SET ? NOT_SET_CODEPOINT : this.delimiter.codePointAt(0);
4145
this.escapeCharCodePoint = this.escapeChar == NOT_SET ? NOT_SET_CODEPOINT : this.escapeChar.codePointAt(0);
4246
this.quoteCharCodePoint = quoteChar.codePointAt(0); // quote char cannot be NOT_SET
47+
this.lineTerminatorCodePoints = this.lineTerminator.codePoints().toArray();
4348
}
4449
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReader.java

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
package com.oracle.graal.python.builtins.modules.csv;
22

3+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.AFTER_ESCAPED_CRNL;
4+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.EAT_CRNL;
5+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.ESCAPED_CHAR;
6+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.ESCAPE_IN_QUOTED_FIELD;
7+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.IN_FIELD;
8+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.IN_QUOTED_FIELD;
9+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.START_FIELD;
10+
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.START_RECORD;
11+
import static com.oracle.graal.python.builtins.modules.csv.QuoteStyle.QUOTE_NONE;
12+
import static com.oracle.graal.python.builtins.modules.csv.QuoteStyle.QUOTE_NONNUMERIC;
13+
14+
import java.util.ArrayList;
15+
316
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
417
import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject;
518
import com.oracle.graal.python.lib.PyNumberFloatNode;
@@ -16,19 +29,6 @@
1629
import com.oracle.truffle.api.exception.AbstractTruffleException;
1730
import com.oracle.truffle.api.object.Shape;
1831

19-
import java.util.ArrayList;
20-
21-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.AFTER_ESCAPED_CRNL;
22-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.EAT_CRNL;
23-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.ESCAPED_CHAR;
24-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.ESCAPE_IN_QUOTED_FIELD;
25-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.IN_FIELD;
26-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.IN_QUOTED_FIELD;
27-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.START_FIELD;
28-
import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.START_RECORD;
29-
import static com.oracle.graal.python.builtins.modules.csv.QuoteStyle.QUOTE_NONE;
30-
import static com.oracle.graal.python.builtins.modules.csv.QuoteStyle.QUOTE_NONNUMERIC;
31-
3232
public final class CSVReader extends PythonBuiltinObject {
3333

3434
private static final int EOL = -2;
@@ -94,7 +94,7 @@ Object parseIterableInput() {
9494
throw PRaiseNode.getUncached().raise(PythonBuiltinClassType.CSVError, ErrorMessages.UNEXPECTED_END_OF_DATA);
9595
} else {
9696
try {
97-
this.parseSaveField();
97+
parseSaveField();
9898
} catch (AbstractTruffleException ignored) {
9999
throw e.getExceptionForReraise();
100100
}
@@ -104,15 +104,13 @@ Object parseIterableInput() {
104104
throw PRaiseNode.getUncached().raise(PythonBuiltinClassType.StopIteration);
105105
}
106106

107-
108107
String line;
109108
try {
110109
line = CastToJavaStringNode.getUncached().execute(lineObj);
111110
} catch (CannotCastException e) {
112111
throw PRaiseNode.getUncached().raise(PythonBuiltinClassType.CSVError, ErrorMessages.WRONG_ITERATOR_RETURN_TYPE, GetClassNode.getUncached().execute(lineObj));
113112
}
114113

115-
116114
//TODO: Implement PyUnicode_Check Node? => how do we handle the possibility of bytes?
117115
// PyPy: if isinstance(line, str) and '\0' in line or isinstance(line, bytes) and line.index(0) >=0:
118116
// raise Error("line contains NULL byte")

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVWriter.java

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,22 +83,21 @@ void joinField(Object field) {
8383
}
8484
}
8585

86-
boolean needsQuotes(Object field) {
86+
boolean needsQuotes(String field) {
8787
if (field == null)
8888
return false;
8989

9090
boolean needsQuotes = false;
91-
String fieldStr = field.toString();
92-
final int strLen = fieldStr.length();
91+
final int strLen = field.length();
9392

9493
for (int offset = 0; offset < strLen;) {
9594
boolean wantEscape = false;
9695

97-
final int c = fieldStr.codePointAt(offset);
96+
final int c = field.codePointAt(offset);
9897
if (c == dialect.delimiterCodePoint ||
9998
c == dialect.escapeCharCodePoint ||
10099
c == dialect.quoteCharCodePoint ||
101-
dialect.lineTerminator.codePoints().anyMatch(cp -> cp == c)) {
100+
containsCodePoint(dialect.lineTerminatorCodePoints, c)) {
102101

103102
if (dialect.quoting == QUOTE_NONE ||
104103
c == dialect.quoteCharCodePoint && !dialect.doubleQuote ||
@@ -117,7 +116,7 @@ boolean needsQuotes(Object field) {
117116
return needsQuotes;
118117
}
119118

120-
void joinAppend(Object field, boolean quoted) {
119+
void joinAppend(String field, boolean quoted) {
121120
CSVDialect dialect = this.dialect;
122121

123122
/* If we don't already know that the field must be quoted due to
@@ -141,21 +140,20 @@ void joinAppend(Object field, boolean quoted) {
141140
/* Copy field data and add escape chars as needed */
142141
/* If field is null just pass over */
143142
if (field != null) {
144-
String fieldStr = field.toString();
145-
int strLen = fieldStr.length();
143+
int strLen = field.length();
146144

147145
/* Python supports utf-32 characters, as Java characters are utf-16 only,
148146
* we have to work with code points instead. */
149147
for (int offset = 0; offset < strLen; ) {
150148

151149
boolean wantEscape = false;
152150

153-
final int c = fieldStr.codePointAt(offset);
151+
final int c = field.codePointAt(offset);
154152

155153
if (c == dialect.delimiterCodePoint ||
156154
c == dialect.escapeCharCodePoint ||
157155
c == dialect.quoteCharCodePoint ||
158-
dialect.lineTerminator.codePoints().anyMatch(cp -> cp == c)) {
156+
containsCodePoint(dialect.lineTerminatorCodePoints, c)) {
159157

160158
if (dialect.quoting == QUOTE_NONE) {
161159
wantEscape = true;
@@ -194,4 +192,12 @@ void joinAppendLineterminator() {
194192
this.rec.append(this.dialect.lineTerminator);
195193
}
196194

195+
boolean containsCodePoint(int[] codePoints, int codePoint) {
196+
for (int cp : codePoints) {
197+
if (cp == codePoint)
198+
return true;
199+
}
200+
return false;
201+
}
202+
197203
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVWriterBuiltins.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package com.oracle.graal.python.builtins.modules.csv;
22

3+
import java.util.List;
4+
35
import com.oracle.graal.python.PythonLanguage;
46
import com.oracle.graal.python.builtins.Builtin;
57
import com.oracle.graal.python.builtins.CoreFunctions;
@@ -17,14 +19,13 @@
1719
import com.oracle.graal.python.nodes.object.IsBuiltinClassProfile;
1820
import com.oracle.graal.python.runtime.ExecutionContext.IndirectCallContext;
1921
import com.oracle.graal.python.runtime.exception.PException;
22+
import com.oracle.graal.python.util.PythonUtils;
2023
import com.oracle.truffle.api.dsl.Cached;
2124
import com.oracle.truffle.api.dsl.GenerateNodeFactory;
2225
import com.oracle.truffle.api.dsl.NodeFactory;
2326
import com.oracle.truffle.api.dsl.Specialization;
2427
import com.oracle.truffle.api.frame.VirtualFrame;
2528

26-
import java.util.List;
27-
2829
@CoreFunctions(extendClasses = PythonBuiltinClassType.CSVWriter)
2930
public class CSVWriterBuiltins extends PythonBuiltins {
3031

@@ -43,7 +44,7 @@ Object doIt(VirtualFrame frame, CSVWriter self, Object seq,
4344
@Cached GetClassNode getClass,
4445
@Cached IsBuiltinClassProfile errorProfile,
4546
@Cached CallUnaryMethodNode callNode) {
46-
Object iter, field;
47+
Object iter;
4748

4849
try {
4950
iter = getIter.execute(frame, seq);
@@ -61,7 +62,7 @@ Object doIt(VirtualFrame frame, CSVWriter self, Object seq,
6162
IndirectCallContext.exit(frame, language, getContext(), state);
6263
}
6364

64-
return callNode.executeObject(frame, self.write, self.rec.toString());
65+
return callNode.executeObject(frame, self.write, PythonUtils.sbToString(self.rec));
6566
}
6667
}
6768

0 commit comments

Comments
 (0)