Skip to content

Commit b12acfc

Browse files
ddobrevtritao
authored andcommitted
Upgrade ANSI marshalling to UTF-8 marshalling
UTF-8 is backwards-compatible with i.e. a strict superset of, ANSI, so we can safely marshal any ANSI as UTF-8 and thus we gain UTF-8 support for free. Also fixed leaking memory when passing strings to native code as nobody ever freed the previously used Marshal.StringToPtr. Signed-off-by: Dimitar Dobrev <[email protected]>
1 parent 573272e commit b12acfc

File tree

8 files changed

+134
-70
lines changed

8 files changed

+134
-70
lines changed

src/Generator/Generators/CSharp/CSharpSources.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,11 @@ private void GenerateIndexerSetter(Function function)
10681068
marshal.Context.ArgumentPrefix}{marshal.Context.Return};");
10691069
}
10701070
}
1071+
if (paramMarshal.HasUsingBlock)
1072+
UnindentAndWriteCloseBrace();
1073+
1074+
if (ctx.HasCodeBlock)
1075+
UnindentAndWriteCloseBrace();
10711076
}
10721077

10731078
private void GeneratePropertyGetter<T>(T decl, Class @class,
@@ -1802,6 +1807,9 @@ private void GenerateVTableManagedCall(Method method)
18021807
{
18031808
WriteLine($"return {marshal.Context.ArgumentPrefix}{marshal.Context.Return};");
18041809
}
1810+
1811+
if (ctx.HasCodeBlock)
1812+
UnindentAndWriteCloseBrace();
18051813
}
18061814

18071815
if (!isVoid && isSetter)

src/Generator/Options.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ public DriverOptions()
2020
GeneratorKind = GeneratorKind.CSharp;
2121
OutputInteropIncludes = true;
2222

23-
Encoding = Encoding.ASCII;
24-
2523
StripLibPrefix = true;
2624

2725
ExplicitlyPatchedVirtualFunctions = new HashSet<string>();
@@ -125,7 +123,7 @@ public bool DoAllModulesHaveLibraries() =>
125123
/// </summary>
126124
public CommentKind? CommentKind;
127125

128-
public Encoding Encoding { get; set; }
126+
public Encoding Encoding { get; set; } = Encoding.UTF8;
129127

130128
public bool IsCSharpGenerator => GeneratorKind == GeneratorKind.CSharp;
131129

src/Generator/Passes/DelegatesPass.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ private static StringBuilder FormatTypesStringForIdentifier(StringBuilder types)
276276
{
277277
// TODO: all of this needs proper general fixing by only leaving type names
278278
return types.Replace("global::System.", string.Empty)
279+
.Replace("[MarshalAs(UnmanagedType.LPUTF8Str)] ", string.Empty)
279280
.Replace("[MarshalAs(UnmanagedType.LPWStr)] ", string.Empty)
280281
.Replace("global::", string.Empty).Replace("*", "Ptr")
281282
.Replace('.', '_').Replace(' ', '_').Replace("::", "_")

src/Generator/Types/Std/Stdlib.cs

Lines changed: 82 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public override void CLIMarshalToManaged(MarshalContext ctx)
129129
encoding = Context.Options.Encoding;
130130

131131
string param;
132-
if (Equals(encoding, Encoding.ASCII))
132+
if (Equals(encoding, Encoding.ASCII) || Equals(encoding, Encoding.UTF8))
133133
param = "E_UTF8";
134134
else if (Equals(encoding, Encoding.Unicode) ||
135135
Equals(encoding, Encoding.BigEndianUnicode))
@@ -154,8 +154,9 @@ public override Type CSharpSignatureType(TypePrinterContext ctx)
154154
return new CustomType(typePrinter.IntPtrType);
155155
}
156156

157-
if (Context.Options.Encoding == Encoding.ASCII)
158-
return new CustomType("string");
157+
if (Context.Options.Encoding == Encoding.ASCII ||
158+
Context.Options.Encoding == Encoding.UTF8)
159+
return new CustomType("[MarshalAs(UnmanagedType.LPUTF8Str)] string");
159160

160161
if (Context.Options.Encoding == Encoding.Unicode ||
161162
Context.Options.Encoding == Encoding.BigEndianUnicode)
@@ -183,19 +184,14 @@ public override void CSharpMarshalToNative(CSharpMarshalContext ctx)
183184
if (substitution != null)
184185
param = $"({substitution.Replacement}) (object) {param}";
185186

186-
if (Equals(Context.Options.Encoding, Encoding.ASCII))
187-
{
188-
ctx.Return.Write($"Marshal.StringToHGlobalAnsi({param})");
189-
return;
190-
}
191-
if (Equals(Context.Options.Encoding, Encoding.Unicode) ||
192-
Equals(Context.Options.Encoding, Encoding.BigEndianUnicode))
193-
{
194-
ctx.Return.Write($"Marshal.StringToHGlobalUni({param})");
195-
return;
196-
}
197-
throw new System.NotSupportedException(
198-
$"{Context.Options.Encoding.EncodingName} is not supported yet.");
187+
string bytes = $"__bytes{ctx.ParameterIndex}";
188+
string bytePtr = $"__bytePtr{ctx.ParameterIndex}";
189+
ctx.Before.WriteLine($@"byte[] {bytes} = global::System.Text.Encoding.{
190+
GetEncodingClass(ctx.Parameter)}.GetBytes({param});");
191+
ctx.Before.WriteLine($"fixed (byte* {bytePtr} = {bytes})");
192+
ctx.HasCodeBlock = true;
193+
ctx.Before.WriteOpenBraceAndIndent();
194+
ctx.Return.Write($"new global::System.IntPtr({bytePtr})");
199195
}
200196

201197
public override void CSharpMarshalToManaged(CSharpMarshalContext ctx)
@@ -207,49 +203,93 @@ public override void CSharpMarshalToManaged(CSharpMarshalContext ctx)
207203
return;
208204
}
209205

210-
Type type = Type.Desugar();
211-
Type pointee = type.GetPointee().Desugar();
212-
var isChar = type.IsPointerToPrimitiveType(PrimitiveType.Char) ||
213-
(pointee.IsPointerToPrimitiveType(PrimitiveType.Char) &&
214-
ctx.Parameter != null &&
215-
(ctx.Parameter.IsInOut || ctx.Parameter.IsOut));
216-
var encoding = isChar ? Encoding.ASCII : Encoding.Unicode;
217-
218-
if (Equals(encoding, Encoding.ASCII))
219-
encoding = Context.Options.Encoding;
220-
221206
string returnVarName = ctx.ReturnVarName;
207+
string nullPtr = "global::System.IntPtr.Zero";
222208
if (ctx.Function != null)
223209
{
224210
Type returnType = ctx.Function.ReturnType.Type.Desugar();
225211
if (returnType.IsAddress() &&
226212
returnType.GetPointee().Desugar().IsAddress())
227213
{
228-
returnVarName = $"new global::System.IntPtr(*{returnVarName})";
214+
returnVarName = $"*{returnVarName}";
215+
nullPtr = "null";
229216
}
230217
}
231218

232-
if (Equals(encoding, Encoding.ASCII))
219+
TextGenerator textGenerator;
220+
if (ctx.Parameter == null)
233221
{
234-
ctx.Return.Write($"Marshal.PtrToStringAnsi({returnVarName})");
235-
return;
222+
textGenerator = ctx.Before;
223+
textGenerator.WriteLine($"if ({ctx.ReturnVarName} == {nullPtr})");
224+
textGenerator.WriteLineIndent($"return default({ctx.ReturnType});");
236225
}
237-
if (Equals(encoding, Encoding.UTF8))
226+
else
238227
{
239-
ctx.Return.Write($"Marshal.PtrToStringUTF8({returnVarName})");
240-
return;
228+
textGenerator = ctx.Cleanup;
229+
textGenerator.WriteLine($"if ({ctx.ReturnVarName} == {nullPtr})");
230+
textGenerator.WriteOpenBraceAndIndent();
231+
textGenerator.WriteLine($"{ctx.Parameter.Name} = default({Type.Desugar()});");
232+
textGenerator.WriteLine("return;");
233+
textGenerator.UnindentAndWriteCloseBrace();
241234
}
242235

243-
// If we reach this, we know the string is Unicode.
244-
if (isChar || ctx.Context.TargetInfo.WCharWidth == 16)
236+
string encoding = GetEncodingClass(ctx.Parameter);
237+
string type = GetTypeForCodePoint(encoding);
238+
textGenerator.WriteLine($"var __retPtr = ({type}*) {returnVarName};");
239+
textGenerator.WriteLine("int __length = 0;");
240+
textGenerator.WriteLine($"while (*(__retPtr++) != 0) __length += sizeof({type});");
241+
242+
ctx.Return.Write($@"global::System.Text.Encoding.{
243+
encoding}.GetString((byte*) {returnVarName}, __length)");
244+
}
245+
246+
private string GetEncodingClass(Parameter parameter)
247+
{
248+
Type type = Type.Desugar();
249+
Type pointee = type.GetPointee().Desugar();
250+
var isChar = type.IsPointerToPrimitiveType(PrimitiveType.Char) ||
251+
(pointee.IsPointerToPrimitiveType(PrimitiveType.Char) &&
252+
parameter != null &&
253+
(parameter.IsInOut || parameter.IsOut));
254+
255+
if (!isChar)
256+
return (Context.TargetInfo.WCharWidth == 16) ?
257+
nameof(Encoding.Unicode) : nameof(Encoding.UTF32);
258+
259+
if (Context.Options.Encoding == Encoding.ASCII)
260+
return nameof(Encoding.ASCII);
261+
262+
if (Context.Options.Encoding == Encoding.BigEndianUnicode)
263+
return nameof(Encoding.BigEndianUnicode);
264+
265+
if (Context.Options.Encoding == Encoding.Unicode)
266+
return nameof(Encoding.Unicode);
267+
268+
if (Context.Options.Encoding == Encoding.UTF32)
269+
return nameof(Encoding.UTF32);
270+
271+
if (Context.Options.Encoding == Encoding.UTF7)
272+
return nameof(Encoding.UTF7);
273+
274+
if (Context.Options.Encoding == Encoding.UTF8)
275+
return nameof(Encoding.UTF8);
276+
277+
throw new System.NotSupportedException(
278+
$"{Context.Options.Encoding.EncodingName} is not supported yet.");
279+
}
280+
281+
private static string GetTypeForCodePoint(string encoding)
282+
{
283+
switch (encoding)
245284
{
246-
ctx.Return.Write($"Marshal.PtrToStringUni({returnVarName})");
247-
return;
285+
case nameof(Encoding.UTF32):
286+
return "int";
287+
case nameof(Encoding.Unicode):
288+
case nameof(Encoding.BigEndianUnicode):
289+
return "short";
290+
default:
291+
return "byte";
248292
}
249-
// If we reach this, we should have an UTF-32 wide string.
250-
const string encodingName = "System.Text.Encoding.UTF32";
251-
ctx.Return.Write($@"CppSharp.Runtime.Helpers.MarshalEncodedString({
252-
returnVarName}, {encodingName})");
253293
}
254294
}
255295

src/Runtime/Helpers.cs

Lines changed: 0 additions & 24 deletions
This file was deleted.

tests/Common/Common.Tests.cs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ This is a very long string. This is a very long string. This is a very long stri
829829
}
830830
}
831831

832-
[Test]
832+
[Test]
833833
public void TestStdStringPassedByValue()
834834
{
835835
// when C++ memory is deleted, it's only marked as free but not immediadely freed
@@ -859,6 +859,39 @@ public void TestNullStdString()
859859
}
860860
}
861861

862+
[Test]
863+
public void TestUTF8()
864+
{
865+
var strings = new[] { "ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя" +
866+
"ѐёђѓєѕіїјљњћќѝўџѠѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿҀҁҊҋҌҍҎҏҐґҒғҔҕҖҗҘҙҚқҜҝҞҟҠҡҢңҤҥҦҧҨҩ" +
867+
"ҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿӀӁӂӃӄӅӆӇӈӉӊӋӌӍӎӏӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽ" +
868+
"ӾӿԀԁԂԃԄԅԆԇԈԉԊԋԌԍԎԏԐԑԒԓ",
869+
870+
"აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶჷჸჹჺ",
871+
872+
"ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖՙաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև",
873+
874+
"々〆〱〲〳〴〵〻〼ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづて" +
875+
"でとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕ" +
876+
"ゖゝゞゟァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニ" +
877+
"ヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ" +
878+
"ーヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ",
879+
880+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ" +
881+
"ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵ" +
882+
"ĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊ" +
883+
"ƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝ" +
884+
"ǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳ" +
885+
"ȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔḕḖḗḘḙḚḛḜḝḞḟḠḡḢḣḤḥḦḧḨḩḪḫḬḭḮḯḰḱḲḳḴḵḶḷḸḹḺḻḼḽ" +
886+
"ḾḿṀṁṂṃṄṅṆṇṈṉṊṋṌṍṎṏṐṑṒṓṔṕṖṗṘṙṚṛṜṝṞṟṠṡṢṣṤṥṦṧṨṩṪṫṬṭṮṯṰṱṲṳṴṵṶṷṸṹṺṻṼṽṾṿẀẁẂẃẄẅẆẇẈẉẊẋẌẍẎẏẐẑẒẓẔẕẖẗẘẙẚ" +
887+
"ẛẞẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ" +
888+
"ⱠⱡⱢⱣⱤⱥⱦⱧⱨⱩⱪⱫⱬⱭⱱⱲⱳⱴⱵⱶⱷ" };
889+
foreach (var @string in strings)
890+
{
891+
Assert.That(Common.TakeReturnUTF8(@string), Is.EqualTo(@string));
892+
}
893+
}
894+
862895
private class CustomDerivedFromVirtual : AbstractWithVirtualDtor
863896
{
864897
public override void Abstract()

tests/Common/Common.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,12 @@ void overloadPointer(const void* p, int i)
10711071
{
10721072
}
10731073

1074+
const char* takeReturnUTF8(const char* utf8)
1075+
{
1076+
UTF8 = utf8;
1077+
return UTF8.data();
1078+
}
1079+
10741080
StructWithCopyCtor::StructWithCopyCtor() {}
10751081
StructWithCopyCtor::StructWithCopyCtor(const StructWithCopyCtor& other) : mBits(other.mBits) {}
10761082

tests/Common/Common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,6 +1495,8 @@ DLL_API void takeReferenceToVoidStar(const void*& p);
14951495
DLL_API void takeVoidStarStar(void** p);
14961496
DLL_API void overloadPointer(void* p, int i = 0);
14971497
DLL_API void overloadPointer(const void* p, int i = 0);
1498+
DLL_API const char* takeReturnUTF8(const char* utf8);
1499+
DLL_API std::string UTF8;
14981500

14991501
struct DLL_API StructWithCopyCtor
15001502
{

0 commit comments

Comments
 (0)