12
12
// Converting C++ kj arrays, strings, etc to Rust:
13
13
// - kjObject.as<Rust>() - creates zero-copy read-only Rust view
14
14
// - kjObject.as<RustMutable>() - creates zero-copy mutable Rust view
15
- // - kjObject.as<RustCopy>() - creates owned Rust copy
15
+ // - kjObject.as<RustCopy>() - creates owned Rust copy (safe byte arrays)
16
+ // - kjObject.as<RustUncheckedUtf8>() - creates Rust string
17
+ // - kjObject.as<RustCopyUncheckedUtf8>() - creates owned Rust string (assumes valid UTF-8)
16
18
//
17
19
// Converting Rust to C++ kj objects:
18
20
// - from<Rust>(rustObject) - creates zero-copy C++ view
19
21
// - from<RustCopy>(rustObject) - creates owned C++ copy
20
- // - kj::str(rustString) - automatic conversion (via KJ_STRINGIFY)
21
- // - kj::hashCode(rustString) - automatic hash computation (via KJ_HASHCODE)
22
+ // - kj::str(rustString/rustSlice/rustVec ) - automatic conversion (via KJ_STRINGIFY)
23
+ // - kj::hashCode(rustString/rustSlice/rustVec ) - automatic hash computation (via KJ_HASHCODE)
22
24
//
23
25
// ============================================================================
24
- // CONVERSION FUNCTIONS
26
+ // ARRAY/COLLECTION CONVERSIONS
25
27
// ============================================================================
26
28
//
27
29
// Zero-copy conversions from Rust to C++:
28
30
// - from<Rust>(rust::Vec<T>) -> kj::ArrayPtr<const T>
29
31
// - from<Rust>(rust::Slice<T>) -> kj::ArrayPtr<T>
30
- // - from<Rust>(rust::String) -> kj::ArrayPtr<const char>
31
- // - from<Rust>(rust::str) -> kj::ArrayPtr<const char>
32
- //
33
- // Owned conversions from Rust to C++:
34
- // - from<RustCopy>(rust::Slice<rust::str>) -> kj::Array<kj::String>
35
- // - from<RustCopy>(rust::Vec<rust::String>) -> kj::Array<kj::String>
36
32
//
37
33
// Zero-copy conversions from C++ to Rust (read-only):
38
34
// - kjArray.as<Rust>() -> rust::Slice<const T>
39
- // - kjString.as<Rust>() -> rust::String
40
- // - kjStringPtr.as<Rust>() -> rust::str
41
- // - kjConstString.as<Rust>() -> rust::str
42
35
//
43
36
// Zero-copy conversions from C++ to Rust (mutable):
44
37
// - kjArray.as<RustMutable>() -> rust::Slice<T>
45
38
// - kjArrayPtr.as<RustMutable>() -> rust::Slice<T>
46
39
//
47
40
// Owned conversions from C++ to Rust (copying):
48
- // - kjStringPtr.as<RustCopy>() -> rust::String
49
- // - kjConstString.as<RustCopy>() -> rust::String
50
41
// - kjArrayPtr.as<RustCopy>() -> rust::Vec<T>
51
42
//
52
- // Automatic conversions (via ADL):
43
+ // ============================================================================
44
+ // STRING CONVERSIONS
45
+ // ============================================================================
46
+ //
47
+ // IMPORTANT: Rust strings require valid UTF-8, but KJ strings don't!
48
+ // This library provides both SAFE and UNSAFE string conversion options.
49
+ //
50
+ // --- RUST TO C++ STRING CONVERSIONS ---
51
+ //
52
+ // Zero-copy (always safe):
53
+ // - from<Rust>(rust::String) -> kj::ArrayPtr<const char>
54
+ // - from<Rust>(rust::str) -> kj::ArrayPtr<const char>
55
+ //
56
+ // Owned copies (always safe):
57
+ // - from<RustCopy>(rust::Slice<rust::str>) -> kj::Array<kj::String>
58
+ // - from<RustCopy>(rust::Vec<rust::String>) -> kj::Array<kj::String>
59
+ // - kj::str(rust::str) -> kj::String
60
+ // - kj::str(rust::String) -> kj::String
61
+ //
62
+ // --- C++ TO RUST STRING CONVERSIONS (SAFE) ---
63
+ //
64
+ // Returns raw bytes - use std::str::from_utf8() or from_utf8_lossy() on Rust side:
65
+ // - kjString.as<Rust>() -> rust::Slice<const char> // Safe for non-UTF-8 data
66
+ // - kjStringPtr.as<Rust>() -> rust::Slice<const char> // Safe for non-UTF-8 data
67
+ // - kjConstString.as<Rust>() -> rust::Slice<const char> // Safe for non-UTF-8 data
68
+ //
69
+ // Returns owned bytes - use std::str::from_utf8() or from_utf8_lossy() on Rust side:
70
+ // - kjStringPtr.as<RustCopy>() -> rust::Vec<char> // Safe byte array
71
+ // - kjConstString.as<RustCopy>() -> rust::Vec<char> // Safe byte array
72
+ //
73
+ // --- C++ TO RUST STRING CONVERSIONS (UNSAFE) ---
74
+ //
75
+ // ⚠️ WARNING: These assume valid UTF-8 and rust code might panic or cause undefined behavior
76
+ // if the KJ string contains invalid UTF-8 bytes!
77
+ //
78
+ // Zero-copy (UNSAFE - assumes valid UTF-8):
79
+ // - kjString.as<RustUncheckedUtf8>() -> rust::String
80
+ // - kjStringPtr.as<RustUncheckedUtf8>() -> rust::Str
81
+ // - kjConstString.as<RustUncheckedUtf8>() -> rust::Str
82
+ //
83
+ // Owned copies (UNSAFE - assumes valid UTF-8):
84
+ // - kjStringPtr.as<RustCopyUncheckedUtf8>() -> rust::String
85
+ // - kjConstString.as<RustCopyUncheckedUtf8>() -> rust::String
86
+ //
87
+ // --- AUTOMATIC STRING CONVERSIONS ---
88
+ //
89
+ // These work with kj::str() and kj::hashCode() automatically:
53
90
// - kj::str(rust::String) - uses KJ_STRINGIFY for seamless string conversion
91
+ // - kj::str(rust::Slice<const char>) - uses KJ_STRINGIFY for slice conversion
92
+ // - kj::str(rust::Vec<char>) - uses KJ_STRINGIFY for vector conversion
54
93
// - kj::hashCode(rust::String) - uses KJ_HASHCODE for hash computation
94
+ // - kj::hashCode(rust::Slice<const char>) - uses KJ_HASHCODE for slice hashing
95
+ // - kj::hashCode(rust::Vec<char>) - uses KJ_HASHCODE for vector hashing
55
96
//
56
97
// ============================================================================
57
98
// EXAMPLES
62
103
// // Convert Rust to C++:
63
104
// kj::ArrayPtr<const int> cppView = from<Rust>(rustVec);
64
105
//
65
- // // Convert C++ to Rust (read-only):
66
- // rust::Slice<const int> rustView = cppArray.as<Rust>();
106
+ // // Convert C++ to Rust (read-only, safe):
107
+ // rust::Slice<const char> rustBytes = kjString.as<Rust>();
108
+ // // Then in Rust: std::str::from_utf8(&rustBytes) or from_utf8_lossy(&rustBytes)
67
109
//
68
110
// // Convert C++ to Rust (mutable):
69
111
// rust::Slice<int> rustMutableView = cppArray.as<RustMutable>();
70
112
//
71
- // // Convert C++ to Rust (copying):
72
- // rust::String rustOwnedStr = cppStr.as<RustCopy>();
113
+ // // Convert C++ to Rust (copying, safe):
114
+ // rust::Vec<char> rustOwnedBytes = kjStr.as<RustCopy>();
115
+ //
116
+ // // Convert C++ to Rust (unsafe, assumes valid UTF-8):
117
+ // rust::String rustStr = kjStr.as<RustUncheckedUtf8>(); // UNSAFE!
73
118
//
74
119
// // Automatic string conversion:
75
120
// kj::String cppStr = kj::str(rustStr); // via KJ_STRINGIFY
121
+ // kj::String cppStr2 = kj::str(rustSlice); // also works with slices/vecs
76
122
//
77
123
78
124
#include < rust/cxx.h>
@@ -101,6 +147,16 @@ inline auto KJ_STRINGIFY(const ::rust::str& str) {
101
147
return kj::ArrayPtr<const char >(str.data (), str.size ());
102
148
}
103
149
150
+ // / Converts rust::Slice<const char> to kj::ArrayPtr - called by kj::str(rustSlice)
151
+ inline auto KJ_STRINGIFY (const ::rust::Slice<const char >& str) {
152
+ return kj::ArrayPtr<const char >(str.data (), str.size ());
153
+ }
154
+
155
+ // / Converts rust::Vec<const char> to kj::ArrayPtr - called by kj::str(rustVec)
156
+ inline auto KJ_STRINGIFY (const ::rust::Vec<char >& str) {
157
+ return kj::ArrayPtr<const char >(str.data (), str.size ());
158
+ }
159
+
104
160
// / Hash code for rust::String - called by kj::hashCode(rustString)
105
161
inline auto KJ_HASHCODE (const ::rust::String& str) {
106
162
return kj::hashCode (kj::toCharSequence (str));
@@ -111,6 +167,16 @@ inline auto KJ_HASHCODE(const ::rust::str& str) {
111
167
return kj::hashCode (kj::toCharSequence (str));
112
168
}
113
169
170
+ // / Hash code for rust::Slice<const char> - called by kj::hashCode(rustSlice)
171
+ inline auto KJ_HASHCODE (const ::rust::Slice<const char >& str) {
172
+ return kj::hashCode (kj::toCharSequence (str));
173
+ }
174
+
175
+ // / Hash code for rust::Vec<const char> - called by kj::hashCode(rustVec)
176
+ inline auto KJ_HASHCODE (const ::rust::Vec<char >& str) {
177
+ return kj::hashCode (kj::toCharSequence (str));
178
+ }
179
+
114
180
} // namespace rust
115
181
116
182
namespace kj_rs {
@@ -138,18 +204,18 @@ struct Rust {
138
204
}
139
205
140
206
// / kjString.as<Rust>() - via Rust::from(&kjString)
141
- static ::rust::String from (const kj::String* str) {
142
- return ::rust::String (str->begin (), str->size ());
207
+ static ::rust::Slice< const char > from (const kj::String* str) {
208
+ return ::rust::Slice (str->begin (), str->size ());
143
209
}
144
210
145
211
// / kjStringPtr.as<Rust>() - via Rust::from(&kjStringPtr)
146
- static ::rust::Str from (const kj::StringPtr* str) {
147
- return ::rust::Str (str->begin (), str->size ());
212
+ static ::rust::Slice< const char > from (const kj::StringPtr* str) {
213
+ return ::rust::Slice (str->begin (), str->size ());
148
214
}
149
215
150
216
// / kjConstString.as<Rust>() - via Rust::from(&kjConstString)
151
- static ::rust::Str from (const kj::ConstString* str) {
152
- return ::rust::Str (str->begin (), str->size ());
217
+ static ::rust::Slice< const char > from (const kj::ConstString* str) {
218
+ return ::rust::Slice (str->begin (), str->size ());
153
219
}
154
220
155
221
// into() methods for from<Rust>(rustObject) - converting Rust to KJ
@@ -179,16 +245,6 @@ struct Rust {
179
245
180
246
// / Owned Rust copies: kjObject.as<RustCopy>() and from<RustCopy>(kjObject)
181
247
struct RustCopy {
182
- // / kjStringPtr.as<RustCopy>() - via RustCopy::from(&kjStringPtr)
183
- static ::rust::String from (const kj::StringPtr* str) {
184
- return ::rust::String (str->begin (), str->size ());
185
- }
186
-
187
- // / kjConstString.as<RustCopy>() - via RustCopy::from(&kjConstString)
188
- static ::rust::String from (const kj::ConstString* str) {
189
- return ::rust::String (str->begin (), str->size ());
190
- }
191
-
192
248
// / kjArrayPtr.as<RustCopy>() - via RustCopy::from(&kjArrayPtr)
193
249
template <typename T>
194
250
static ::rust::Vec<T> from (kj::ArrayPtr<const T>* arr) {
@@ -200,6 +256,19 @@ struct RustCopy {
200
256
return result;
201
257
}
202
258
259
+ // / kjStringPtr.as<RustCopy>() - via RustCopy::from(&kjStringPtr)
260
+ static ::rust::Vec<char > from (const kj::StringPtr* str) {
261
+ auto ptr = str->asArray ();
262
+ return from (&ptr);
263
+ }
264
+
265
+ // / kjConstString.as<RustCopy>() - via RustCopy::from(&kjConstString)
266
+ static ::rust::Vec<char > from (const kj::ConstString* str) {
267
+ auto ptr = str->asArray ();
268
+ return from (&ptr);
269
+ }
270
+
271
+
203
272
// / from<RustCopy>(rustSliceOfStrs) - Copy slice of strs to null-terminated KJ strings
204
273
static kj::Array<kj::String> into (::rust::Slice<::rust::str> slice) {
205
274
auto res = kj::heapArrayBuilder<kj::String>(slice.size ());
@@ -234,4 +303,39 @@ struct RustMutable {
234
303
}
235
304
};
236
305
237
- } // namespace kj_rs
306
+ // Rust strings require valid utf8 content, which is not enforced by `kj::String`.
307
+ // Passing invalid utf8 to `rust::String` could result in panics and other unexpected behaviour.
308
+ // Use this struct to convert `kj::String` to `rust::String` without checking for valid utf8
309
+ // when you are confident about the content of the string or do not care about the consequences.
310
+ // It is also safer to convert strings to slices and use `from_utf8_lossy` or friends on rust side.
311
+ struct RustUncheckedUtf8 {
312
+ // / kjString.as<Rust>() - via Rust::from(&kjString)
313
+ static ::rust::String from (const kj::String* str) {
314
+ return ::rust::String (str->begin (), str->size ());
315
+ }
316
+
317
+ // / kjStringPtr.as<Rust>() - via Rust::from(&kjStringPtr)
318
+ static ::rust::Str from (const kj::StringPtr* str) {
319
+ return ::rust::Str (str->begin (), str->size ());
320
+ }
321
+
322
+ // / kjConstString.as<Rust>() - via Rust::from(&kjConstString)
323
+ static ::rust::Str from (const kj::ConstString* str) {
324
+ return ::rust::Str (str->begin (), str->size ());
325
+ }
326
+ };
327
+
328
+ // Copying conversion for string types. See comment for `RustUncheckedUtf8` for details.
329
+ struct RustCopyUncheckedUtf8 {
330
+ // / kjStringPtr.as<RustCopy>() - via RustCopy::from(&kjStringPtr)
331
+ static ::rust::String from (const kj::StringPtr* str) {
332
+ return ::rust::String (str->begin (), str->size ());
333
+ }
334
+
335
+ // / kjConstString.as<RustCopy>() - via RustCopy::from(&kjConstString)
336
+ static ::rust::String from (const kj::ConstString* str) {
337
+ return ::rust::String (str->begin (), str->size ());
338
+ }
339
+ };
340
+
341
+ } // namespace kj_rs
0 commit comments