Skip to content
This repository was archived by the owner on Jan 14, 2023. It is now read-only.

Commit f6d3acb

Browse files
Theosakamgstonier
authored andcommitted
Add support of UTF-8!
For i18n (eg. TTS). "A string must always contain UTF-8 encoded or 7-bit ASCII text." https://developers.google.com/protocol-buffers/docs/proto#scalar "unicode strings are currently not supported as a ROS data type. utf-8 should be used to be compatible with ROS string serialization. " http://wiki.ros.org/msg
1 parent 0ffb811 commit f6d3acb

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

message_generation/src/main/java/org/ros/internal/message/field/PrimitiveFieldType.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
/**
3030
* @author [email protected] (Damon Kohler)
31+
* @author [email protected] (Mickael Gaillard)
3132
*/
3233
public enum PrimitiveFieldType implements FieldType {
3334

@@ -571,7 +572,7 @@ public int getSerializedSize() {
571572
@Override
572573
public <T> void serialize(T value, ChannelBuffer buffer) {
573574
Preconditions.checkArgument(value instanceof String);
574-
byte[] bytes = ((String) value).getBytes();
575+
byte[] bytes = ((String) value).getBytes(DEFAULT_CHARSET);
575576
buffer.writeInt(bytes.length);
576577
buffer.writeBytes(bytes);
577578
}
@@ -581,7 +582,7 @@ public <T> void serialize(T value, ChannelBuffer buffer) {
581582
public String deserialize(ChannelBuffer buffer) {
582583
int length = buffer.readInt();
583584
ByteBuffer stringBuffer = buffer.readSlice(length).toByteBuffer();
584-
return Charset.forName("US-ASCII").decode(stringBuffer).toString();
585+
return DEFAULT_CHARSET.decode(stringBuffer).toString();
585586
}
586587

587588
@SuppressWarnings("unchecked")
@@ -678,6 +679,7 @@ public String getJavaTypeName() {
678679
}
679680
};
680681

682+
private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
681683
private static final ImmutableSet<String> TYPE_NAMES;
682684

683685
static {

message_generation/src/test/java/org/ros/internal/message/RawMessageSerializationTest.java

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
/**
3232
* @author [email protected] (Damon Kohler)
33+
* @author [email protected] (Mickael Gaillard)
3334
*/
3435
public class RawMessageSerializationTest {
3536

@@ -138,6 +139,71 @@ public void testString() {
138139
rawMessage.setString("data", "Hello, ROS!");
139140
checkSerializeAndDeserialize(rawMessage);
140141
}
142+
143+
@Test
144+
public void testStringUTF8() {
145+
RawMessage rawMessage = messageFactory.newFromType("std_msgs/String");
146+
rawMessage.setString("data", "éêè €àáßëœ 文字化け");
147+
checkSerializeAndDeserialize(rawMessage);
148+
149+
// i18n test case
150+
// base on http://www.inter-locale.com/whitepaper/learn/learn-to-test.html
151+
152+
// Combining Marks and Accents test
153+
rawMessage.setString("data", "àéîōũ");
154+
checkSerializeAndDeserialize(rawMessage);
155+
156+
// DOS 860 test
157+
rawMessage.setString("data", "você nós mãe avô irmã criança");
158+
checkSerializeAndDeserialize(rawMessage);
159+
160+
// Windows-1252 test
161+
rawMessage.setString("data", "€ŒœŠš™©‰ƒ");
162+
checkSerializeAndDeserialize(rawMessage);
163+
164+
// Turkish test
165+
rawMessage.setString("data", "ışık bir İyi Günler");
166+
checkSerializeAndDeserialize(rawMessage);
167+
168+
// Dakuten and handakuten marks test
169+
rawMessage.setString("data", "がざばだぱか゛さ゛た゛は");
170+
checkSerializeAndDeserialize(rawMessage);
171+
172+
// Combining Grapheme Joiner character
173+
rawMessage.setString("data", "אִ͏ַ");
174+
checkSerializeAndDeserialize(rawMessage);
175+
176+
// Bidi with Latin test
177+
rawMessage.setString("data", "abcאבגדabc ");
178+
checkSerializeAndDeserialize(rawMessage);
179+
180+
rawMessage.setString("data", "אבגדabcאבגד");
181+
checkSerializeAndDeserialize(rawMessage);
182+
183+
rawMessage.setString("data", "אבגד012אבגד");
184+
checkSerializeAndDeserialize(rawMessage);
185+
186+
rawMessage.setString("data", "אבגד 012 012");
187+
checkSerializeAndDeserialize(rawMessage);
188+
189+
// Complex Scripts test
190+
rawMessage.setString("data", "สวัสดี");
191+
checkSerializeAndDeserialize(rawMessage);
192+
193+
rawMessage.setString("data", "டாஹ்கோ");
194+
checkSerializeAndDeserialize(rawMessage);
195+
196+
rawMessage.setString("data", "بِسْمِ اللّهِ الرَّحْمـَنِ الرَّحِيمِ");
197+
checkSerializeAndDeserialize(rawMessage);
198+
199+
// Numeric Shaping test
200+
rawMessage.setString("data", "عدد مارس ١٩٩٨");
201+
checkSerializeAndDeserialize(rawMessage);
202+
203+
// Common Scripts and Encodings test
204+
rawMessage.setString("data", "Слава Жанна Ювеналий Ярополк");
205+
checkSerializeAndDeserialize(rawMessage);
206+
}
141207

142208
@Test
143209
public void testTime() {

0 commit comments

Comments
 (0)