Skip to content

Commit 20eac37

Browse files
committed
Fix str.translate
1 parent 192a043 commit 20eac37

File tree

4 files changed

+82
-91
lines changed

4 files changed

+82
-91
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,16 @@ public static char charAt(String s, int i) {
271271
return s.charAt(i);
272272
}
273273

274+
@TruffleBoundary(allowInlining = true)
275+
public static int codePointAt(String s, int i) {
276+
return s.codePointAt(i);
277+
}
278+
279+
@TruffleBoundary(allowInlining = true)
280+
public static int charCount(int codePoint) {
281+
return Character.charCount(codePoint);
282+
}
283+
274284
@TruffleBoundary(allowInlining = true)
275285
public static int indexOf(String s, String sub, int fromIndex) {
276286
return s.indexOf(sub, fromIndex);

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -847,31 +847,27 @@ static String doGeneric(VirtualFrame frame, Object self, Object table,
847847
@Cached SpliceNode spliceNode) {
848848
String selfStr = castSelfNode.cast(self, ErrorMessages.REQUIRES_STR_OBJECT_BUT_RECEIVED_P, "translate", self);
849849

850-
char[] translatedChars = new char[selfStr.length()];
850+
StringBuilder sb = StringUtils.newStringBuilder(selfStr.length());
851851

852-
int offset = 0;
853-
for (int i = 0; i < selfStr.length(); i++) {
854-
char original = selfStr.charAt(i);
852+
for (int i = 0; i < selfStr.length();) {
853+
int original = PString.codePointAt(selfStr, i);
855854
Object translated = null;
856855
try {
857-
translated = getItemNode.execute(frame, table, (int) original);
856+
translated = getItemNode.execute(frame, table, original);
858857
} catch (PException e) {
859858
if (!isSubtypeNode.execute(null, plib.getLazyPythonClass(e.getExceptionObject()), PythonBuiltinClassType.LookupError)) {
860859
throw e;
861860
}
862861
}
863-
if (PGuards.isNone(translated)) {
864-
// untranslatable
865-
} else if (translated != null) {
866-
int oldlen = translatedChars.length;
867-
translatedChars = spliceNode.execute(translatedChars, i + offset, translated);
868-
offset += translatedChars.length - oldlen;
862+
if (translated != null) {
863+
spliceNode.execute(sb, translated);
869864
} else {
870-
translatedChars[i + offset] = original;
865+
StringUtils.appendCodePoint(sb, original);
871866
}
867+
i += PString.charCount(original);
872868
}
873869

874-
return new String(translatedChars);
870+
return StringUtils.toString(sb);
875871
}
876872
}
877873

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java

Lines changed: 35 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@
4444
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
4545
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError;
4646

47-
import java.util.Arrays;
48-
4947
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
5048
import com.oracle.graal.python.builtins.objects.PNone;
5149
import com.oracle.graal.python.builtins.objects.cext.CExtNodes.PCallCapiFunction;
@@ -282,20 +280,20 @@ static String doPSequence(VirtualFrame frame, String self, PSequence sequence,
282280
return "";
283281
}
284282

285-
StringBuilder sb = new StringBuilder();
283+
StringBuilder sb = StringUtils.newStringBuilder();
286284
int i = 0;
287285

288286
try {
289287
// manually peel first iteration
290288
Object item = getItemNode.execute(frame, storage, i);
291-
append(sb, castToJavaStringNode.cast(item, INVALID_SEQ_ITEM, i, item));
289+
StringUtils.append(sb, castToJavaStringNode.cast(item, INVALID_SEQ_ITEM, i, item));
292290

293291
for (i = 1; i < len; i++) {
294-
append(sb, self);
292+
StringUtils.append(sb, self);
295293
item = getItemNode.execute(frame, storage, i);
296-
append(sb, castToJavaStringNode.cast(item, INVALID_SEQ_ITEM, i, item));
294+
StringUtils.append(sb, castToJavaStringNode.cast(item, INVALID_SEQ_ITEM, i, item));
297295
}
298-
return toString(sb);
296+
return StringUtils.toString(sb);
299297
} catch (OutOfMemoryError e) {
300298
throw raise.raise(MemoryError);
301299
}
@@ -318,9 +316,9 @@ static String doGeneric(VirtualFrame frame, String string, Object iterable,
318316
throw raise.raise(PythonBuiltinClassType.TypeError, ErrorMessages.CAN_ONLY_JOIN_ITERABLE);
319317
}
320318
try {
321-
StringBuilder str = new StringBuilder();
319+
StringBuilder str = StringUtils.newStringBuilder();
322320
try {
323-
append(str, checkItem(nextNode.execute(frame, iterator), 0, castStrNode, raise));
321+
StringUtils.append(str, checkItem(nextNode.execute(frame, iterator), 0, castStrNode, raise));
324322
} catch (PException e) {
325323
e.expectStopIteration(errorProfile1);
326324
return "";
@@ -332,10 +330,10 @@ static String doGeneric(VirtualFrame frame, String string, Object iterable,
332330
value = nextNode.execute(frame, iterator);
333331
} catch (PException e) {
334332
e.expectStopIteration(errorProfile2);
335-
return toString(str);
333+
return StringUtils.toString(str);
336334
}
337-
append(str, string);
338-
append(str, checkItem(value, i++, castStrNode, raise));
335+
StringUtils.append(str, string);
336+
StringUtils.append(str, checkItem(value, i++, castStrNode, raise));
339337
}
340338
} catch (OutOfMemoryError e) {
341339
throw raise.raise(MemoryError);
@@ -350,106 +348,66 @@ private static String checkItem(Object item, int pos, CastToJavaStringNode castN
350348
}
351349
}
352350

353-
@TruffleBoundary(allowInlining = true)
354-
static StringBuilder append(StringBuilder sb, String o) {
355-
return sb.append(o);
356-
}
357-
358-
@TruffleBoundary(allowInlining = true)
359-
static String toString(StringBuilder sb) {
360-
return sb.toString();
361-
}
362-
363351
static boolean isExactlyListOrTuple(PythonObjectLibrary lib, IsBuiltinClassProfile tupleProfile, IsBuiltinClassProfile listProfile, PSequence sequence) {
364352
Object cls = lib.getLazyPythonClass(sequence);
365353
return tupleProfile.profileClass(cls, PythonBuiltinClassType.PTuple) || listProfile.profileClass(cls, PythonBuiltinClassType.PList);
366354
}
367355
}
368356

357+
@ImportStatic(PGuards.class)
369358
public abstract static class SpliceNode extends PNodeWithContext {
370359

371-
public abstract char[] execute(char[] translatedChars, int i, Object translated);
360+
public abstract void execute(StringBuilder sb, Object translated);
361+
362+
@Specialization(guards = "isNone(none)")
363+
@SuppressWarnings("unused")
364+
static void doNone(StringBuilder sb, PNone none) {
365+
}
372366

373367
@Specialization
374-
static char[] doInt(char[] translatedChars, int i, int translated,
375-
@Shared("raise") @Cached PRaiseNode raise,
376-
@Cached BranchProfile ovf) {
377-
try {
378-
translatedChars[i] = PInt.charValueExact(translated);
379-
return translatedChars;
380-
} catch (OverflowException e) {
381-
ovf.enter();
382-
throw raiseError(raise);
383-
}
368+
@TruffleBoundary(allowInlining = true)
369+
static void doInt(StringBuilder sb, int translated) {
370+
sb.appendCodePoint(translated);
384371
}
385372

386373
@Specialization
387-
static char[] doLong(char[] translatedChars, int i, long translated,
374+
static void doLong(StringBuilder sb, long translated,
388375
@Shared("raise") @Cached PRaiseNode raise,
389-
@Cached BranchProfile ovf) {
376+
@Shared("overflow") @Cached BranchProfile ovf) {
390377
try {
391-
translatedChars[i] = PInt.charValueExact(translated);
392-
return translatedChars;
378+
doInt(sb, PInt.intValueExact(translated));
393379
} catch (OverflowException e) {
394380
ovf.enter();
395381
throw raiseError(raise);
396382
}
397383
}
398384

399385
@Specialization
400-
static char[] doPInt(char[] translatedChars, int i, PInt translated,
386+
static void doPInt(StringBuilder sb, PInt translated,
401387
@Shared("raise") @Cached PRaiseNode raise,
402-
@Cached BranchProfile ovf) {
403-
double doubleValue = translated.doubleValue();
404-
char t = (char) doubleValue;
405-
if (t != doubleValue) {
388+
@Shared("overflow") @Cached BranchProfile ovf) {
389+
try {
390+
doInt(sb, translated.intValueExact());
391+
} catch (OverflowException e) {
406392
ovf.enter();
407393
throw raiseError(raise);
408394
}
409-
translatedChars[i] = t;
410-
return translatedChars;
411-
}
412-
413-
@Specialization(guards = "translated.length() == 1")
414-
@TruffleBoundary
415-
static char[] doStringChar(char[] translatedChars, int i, String translated) {
416-
translatedChars[i] = translated.charAt(0);
417-
return translatedChars;
418395
}
419396

420-
@Specialization(replaces = "doStringChar")
421-
@TruffleBoundary
422-
static char[] doString(char[] translatedChars, int i, String translated) {
423-
int transLen = translated.length();
424-
if (transLen == 1) {
425-
translatedChars[i] = translated.charAt(0);
426-
} else if (transLen == 0) {
427-
int len = translatedChars.length;
428-
return Arrays.copyOf(translatedChars, len - 1);
429-
} else {
430-
int len = translatedChars.length;
431-
char[] copy = Arrays.copyOf(translatedChars, len + transLen - 1);
432-
translated.getChars(0, transLen, copy, i);
433-
return copy;
434-
}
435-
return translatedChars;
397+
@Specialization
398+
@TruffleBoundary(allowInlining = true)
399+
static void doString(StringBuilder sb, String translated) {
400+
sb.append(translated);
436401
}
437402

438-
@Specialization
439-
static char[] doObject(char[] translatedChars, int i, Object translated,
403+
@Specialization(guards = {"!isInteger(translated)", "!isPInt(translated)", "!isNone(translated)"})
404+
static void doObject(StringBuilder sb, Object translated,
440405
@Shared("raise") @Cached PRaiseNode raise,
441-
@Cached BranchProfile ovf,
442406
@Cached CastToJavaStringNode castToJavaStringNode) {
443407

444-
if (translated instanceof Integer || translated instanceof Long) {
445-
return doLong(translatedChars, i, ((Number) translated).longValue(), raise, ovf);
446-
} else if (translated instanceof PInt) {
447-
return doPInt(translatedChars, i, (PInt) translated, raise, ovf);
448-
}
449-
450408
try {
451409
String translatedStr = castToJavaStringNode.execute(translated);
452-
return doString(translatedChars, i, translatedStr);
410+
doString(sb, translatedStr);
453411
} catch (CannotCastException e) {
454412
throw raise.raise(PythonBuiltinClassType.TypeError, ErrorMessages.CHARACTER_MAPPING_MUST_RETURN_INT_NONE_OR_STR);
455413
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ public enum StripKind {
5656
BOTH
5757
}
5858

59-
/** corresponds to {@code unicodeobject.c:_Py_ascii_whitespace} */
59+
/**
60+
* corresponds to {@code unicodeobject.c:_Py_ascii_whitespace}
61+
*/
6062
private static final int[] ASCII_WHITESPACE = {
6163
0, 0, 0, 0, 0, 0, 0, 0,
6264
/* case 0x0009: * CHARACTER TABULATION */
@@ -290,4 +292,29 @@ private static boolean isAlnumICU(int codePoint) {
290292
int numericType = UCharacter.getIntPropertyValue(codePoint, UProperty.NUMERIC_TYPE);
291293
return numericType == UCharacter.NumericType.DECIMAL || numericType == UCharacter.NumericType.DIGIT || numericType == UCharacter.NumericType.NUMERIC;
292294
}
295+
296+
@TruffleBoundary(allowInlining = true)
297+
public static StringBuilder newStringBuilder() {
298+
return new StringBuilder();
299+
}
300+
301+
@TruffleBoundary(allowInlining = true)
302+
public static StringBuilder newStringBuilder(int initialCapacity) {
303+
return new StringBuilder(initialCapacity);
304+
}
305+
306+
@TruffleBoundary(allowInlining = true)
307+
public static void appendCodePoint(StringBuilder sb, int codePoint) {
308+
sb.appendCodePoint(codePoint);
309+
}
310+
311+
@TruffleBoundary(allowInlining = true)
312+
public static void append(StringBuilder sb, String str) {
313+
sb.append(str);
314+
}
315+
316+
@TruffleBoundary(allowInlining = true)
317+
public static String toString(StringBuilder sb) {
318+
return sb.toString();
319+
}
293320
}

0 commit comments

Comments
 (0)