Skip to content

Commit 34f1775

Browse files
committed
Fix: correctly encode non-ascii ext module names
1 parent bda0b75 commit 34f1775

File tree

2 files changed

+62
-33
lines changed

2 files changed

+62
-33
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
*/
4141
package com.oracle.graal.python.builtins.modules;
4242

43-
import static com.oracle.graal.python.nodes.SpecialAttributeNames.__FILE__;
4443
import static com.oracle.graal.python.runtime.exception.PythonErrorType.NotImplementedError;
4544

4645
import java.io.IOException;
@@ -66,15 +65,12 @@
6665
import com.oracle.graal.python.builtins.objects.cext.hpy.HPyExternalFunctionNodes.HPyCheckFunctionResultNode;
6766
import com.oracle.graal.python.builtins.objects.cext.hpy.HPyExternalFunctionNodesFactory.HPyCheckHandleResultNodeGen;
6867
import com.oracle.graal.python.builtins.objects.code.PCode;
69-
import com.oracle.graal.python.builtins.objects.dict.PDict;
7068
import com.oracle.graal.python.builtins.objects.ints.IntBuiltins;
7169
import com.oracle.graal.python.builtins.objects.ints.PInt;
7270
import com.oracle.graal.python.builtins.objects.module.PythonModule;
7371
import com.oracle.graal.python.builtins.objects.object.PythonObject;
7472
import com.oracle.graal.python.builtins.objects.object.PythonObjectLibrary;
7573
import com.oracle.graal.python.builtins.objects.str.PString;
76-
import com.oracle.graal.python.nodes.ErrorMessages;
77-
import com.oracle.graal.python.nodes.PRaiseNode;
7874
import com.oracle.graal.python.nodes.attributes.ReadAttributeFromDynamicObjectNode;
7975
import com.oracle.graal.python.nodes.attributes.SetAttributeNode;
8076
import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode;
@@ -86,7 +82,6 @@
8682
import com.oracle.graal.python.runtime.GilNode;
8783
import com.oracle.graal.python.runtime.PythonContext;
8884
import com.oracle.graal.python.runtime.PythonOptions;
89-
import com.oracle.graal.python.runtime.exception.PythonErrorType;
9085
import com.oracle.truffle.api.CompilerDirectives;
9186
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
9287
import com.oracle.truffle.api.dsl.Cached;
@@ -230,24 +225,11 @@ Object run(VirtualFrame frame, PythonObject moduleSpec, @SuppressWarnings("unuse
230225

231226
@TruffleBoundary
232227
private Object run(PythonContext context, ModuleSpec spec) throws IOException, ApiInitException, ImportException {
233-
234228
Object existingModule = findExtensionObject(spec);
235229
if (existingModule != null) {
236230
return existingModule;
237231
}
238-
239-
Object result = CExtContext.loadCExtModule(this, context, spec, getCheckResultNode(), getCheckHPyResultNode());
240-
if (!(result instanceof PythonModule)) {
241-
// PyModuleDef_Init(pyModuleDef)
242-
// TODO: PyModule_FromDefAndSpec((PyModuleDef*)m, spec);
243-
throw PRaiseNode.raiseUncached(this, PythonErrorType.NotImplementedError, ErrorMessages.MULTI_PHASE_INIT_OF_EXTENSION_MODULE_S, spec.name);
244-
} else {
245-
((PythonModule) result).setAttribute(__FILE__, spec.path);
246-
// TODO: _PyImport_FixupExtensionObject(result, name, path, sys.modules)
247-
PDict sysModules = context.getSysModules();
248-
sysModules.setItem(spec.name, result);
249-
return result;
250-
}
232+
return CExtContext.loadCExtModule(this, context, spec, getCheckResultNode(), getCheckHPyResultNode());
251233
}
252234

253235
@SuppressWarnings({"static-method", "unused"})

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/common/CExtContext.java

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@
4343
import static com.oracle.graal.python.builtins.PythonBuiltinClassType.SystemError;
4444

4545
import java.io.IOException;
46+
import java.nio.charset.CharsetEncoder;
47+
import java.nio.charset.StandardCharsets;
4648

49+
import com.ibm.icu.impl.Punycode;
50+
import com.ibm.icu.text.StringPrepParseException;
4751
import com.oracle.graal.python.PythonLanguage;
4852
import com.oracle.graal.python.builtins.objects.cext.capi.CApiContext;
4953
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.CheckFunctionResultNode;
@@ -61,6 +65,7 @@
6165
import com.oracle.graal.python.runtime.object.PythonObjectFactory;
6266
import com.oracle.graal.python.util.PythonUtils;
6367
import com.oracle.truffle.api.CallTarget;
68+
import com.oracle.truffle.api.CompilerDirectives;
6469
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
6570
import com.oracle.truffle.api.CompilerDirectives.ValueType;
6671
import com.oracle.truffle.api.TruffleLanguage.Env;
@@ -166,24 +171,68 @@ public final DynamicObject getSymbolCache() {
166171
*/
167172
@ValueType
168173
public static final class ModuleSpec {
169-
/**
170-
* The name of the module to load (also just required for creating appropriate error
171-
* messages).
172-
*/
173-
public final String name;
174+
private static CharsetEncoder asciiEncoder;
174175

175-
/**
176-
* The path of the C extension module to load (usually something ending with {@code .so} or
177-
* {@code .dylib} or similar).
178-
*/
176+
public final String name;
179177
public final String path;
180178
public final Object originalModuleSpec;
179+
private String encodedName;
180+
private boolean ascii;
181181

182182
public ModuleSpec(String name, String path, Object originalModuleSpec) {
183183
this.name = name;
184184
this.path = path;
185185
this.originalModuleSpec = originalModuleSpec;
186186
}
187+
188+
private static CharsetEncoder ensureASCIIEncoder() {
189+
if (asciiEncoder == null) {
190+
asciiEncoder = StandardCharsets.US_ASCII.newEncoder();
191+
}
192+
return asciiEncoder;
193+
}
194+
195+
/**
196+
* Get the variable part of a module's export symbol name. Returns a bytes instance. For
197+
* non-ASCII-named modules, the name is encoded as per PEP 489. The hook_prefix pointer is
198+
* set to either ascii_only_prefix or nonascii_prefix, as appropriate.
199+
*/
200+
@TruffleBoundary
201+
public String getEncodedName() {
202+
if (encodedName != null) {
203+
return encodedName;
204+
}
205+
206+
// Get the short name (substring after last dot)
207+
String basename = name.substring(name.lastIndexOf('.') + 1);
208+
209+
if (ensureASCIIEncoder().canEncode(basename)) {
210+
ascii = true;
211+
} else {
212+
ascii = false;
213+
try {
214+
basename = Punycode.encode(basename, null).toString();
215+
} catch (StringPrepParseException e) {
216+
throw CompilerDirectives.shouldNotReachHere();
217+
}
218+
}
219+
220+
// replace '-' by '_'; note: this is fast and does not use regex
221+
return (encodedName = basename.replace('-', '_'));
222+
}
223+
224+
@TruffleBoundary
225+
public String getInitFunctionName(boolean hpy) {
226+
/*
227+
* n.b.: 'getEncodedName' also sets 'ascii' and must therefore be called before 'ascii'
228+
* is queried
229+
*/
230+
String s = getEncodedName();
231+
if (hpy) {
232+
return "HPyInit_" + s;
233+
}
234+
return (ascii ? "PyInit_" : "PyInitU_") + s;
235+
}
187236
}
188237

189238
/**
@@ -216,17 +265,15 @@ public static Object loadCExtModule(Node location, PythonContext context, Module
216265

217266
// Now, try to detect the C extension's API by looking for the appropriate init
218267
// functions.
219-
String basename = spec.name.substring(spec.name.lastIndexOf('.') + 1);
220-
String hpyInitFuncName = "HPyInit_" + basename;
221-
String initFuncName = "PyInit_" + basename;
268+
String hpyInitFuncName = spec.getInitFunctionName(true);
222269
try {
223270
if (llvmInteropLib.isMemberExisting(llvmLibrary, hpyInitFuncName)) {
224271
GraalHPyContext hpyContext = GraalHPyContext.ensureHPyWasLoaded(location, context, spec.name, spec.path);
225272
return hpyContext.initHPyModule(context, llvmLibrary, hpyInitFuncName, spec.name, spec.path, false, llvmInteropLib, checkHPyResultNode);
226273
}
227-
return cApiContext.initCApiModule(location, llvmLibrary, initFuncName, spec, llvmInteropLib, checkFunctionResultNode);
274+
return cApiContext.initCApiModule(location, llvmLibrary, spec.getInitFunctionName(false), spec, llvmInteropLib, checkFunctionResultNode);
228275
} catch (UnsupportedTypeException | ArityException | UnsupportedMessageException e) {
229-
throw new ImportException(CExtContext.wrapJavaException(e, location), spec.name, spec.path, ErrorMessages.CANNOT_INITIALIZE_WITH, spec.path, basename, "");
276+
throw new ImportException(CExtContext.wrapJavaException(e, location), spec.name, spec.path, ErrorMessages.CANNOT_INITIALIZE_WITH, spec.path, spec.getEncodedName(), "");
230277
}
231278
}
232279

0 commit comments

Comments
 (0)