72
72
73
73
import java .math .BigInteger ;
74
74
import java .nio .charset .Charset ;
75
+ import java .nio .charset .CodingErrorAction ;
75
76
import java .util .List ;
76
77
77
78
import com .oracle .graal .python .PythonFileDetector ;
159
160
import com .oracle .graal .python .runtime .PythonParser .ParserMode ;
160
161
import com .oracle .graal .python .runtime .exception .PException ;
161
162
import com .oracle .graal .python .runtime .exception .PythonErrorType ;
163
+ import com .oracle .graal .python .util .CharsetMapping ;
162
164
import com .oracle .graal .python .util .OverflowException ;
163
165
import com .oracle .graal .python .util .PythonUtils ;
164
166
import com .oracle .graal .python .util .Supplier ;
@@ -800,8 +802,9 @@ PCode compile(String expression, String filename, String mode, Object kwFlags, O
800
802
@ Specialization (limit = "3" )
801
803
PCode generic (VirtualFrame frame , Object wSource , Object wFilename , Object wMode , Object kwFlags , Object kwDontInherit , Object kwOptimize ,
802
804
@ Cached CastToJavaStringNode castStr ,
805
+ @ Cached CodecsModuleBuiltins .HandleDecodingErrorNode handleDecodingErrorNode ,
803
806
@ CachedLibrary ("wSource" ) InteropLibrary interopLib ,
804
- @ CachedLibrary (limit = "3 " ) PythonObjectLibrary lib ) {
807
+ @ CachedLibrary (limit = "4 " ) PythonObjectLibrary lib ) {
805
808
if (wSource instanceof PCode ) {
806
809
return (PCode ) wSource ;
807
810
}
@@ -812,12 +815,12 @@ PCode generic(VirtualFrame frame, Object wSource, Object wFilename, Object wMode
812
815
} catch (CannotCastException e ) {
813
816
throw raise (TypeError , ErrorMessages .ARG_S_MUST_BE_S_NOT_P , "compile()" , "mode" , "str" , wMode );
814
817
}
815
- String source = sourceAsString (wSource , filename , interopLib , lib );
818
+ String source = sourceAsString (wSource , filename , interopLib , lib , handleDecodingErrorNode );
816
819
return compile (source , filename , mode , kwFlags , kwDontInherit , kwOptimize );
817
820
}
818
821
819
822
// modeled after _Py_SourceAsString
820
- String sourceAsString (Object source , String filename , InteropLibrary interopLib , PythonObjectLibrary pyLib ) {
823
+ String sourceAsString (Object source , String filename , InteropLibrary interopLib , PythonObjectLibrary pyLib , CodecsModuleBuiltins . HandleDecodingErrorNode handleDecodingErrorNode ) {
821
824
if (interopLib .isString (source )) {
822
825
try {
823
826
return interopLib .asString (source );
@@ -835,27 +838,31 @@ String sourceAsString(Object source, String filename, InteropLibrary interopLib,
835
838
throw CompilerDirectives .shouldNotReachHere (e );
836
839
}
837
840
Charset charset = PythonFileDetector .findEncodingStrict (bytes );
838
- return createString (bytes , charset );
841
+ String pythonEncodingNameFromJavaName = CharsetMapping .getPythonEncodingNameFromJavaName (charset .name ());
842
+ CodecsModuleBuiltins .TruffleDecoder decoder = new CodecsModuleBuiltins .TruffleDecoder (pythonEncodingNameFromJavaName , charset , bytes , CodingErrorAction .REPORT );
843
+ if (!decoder .decodingStep (true )) {
844
+ try {
845
+ handleDecodingErrorNode .execute (decoder , "strict" , source );
846
+ throw CompilerDirectives .shouldNotReachHere ();
847
+ } catch (PException e ) {
848
+ throw raiseInvalidSyntax (filename , "(unicode error) %s" , pyLib .asPString (e .getEscapedException ()));
849
+ }
850
+ }
851
+ return decoder .getString ();
839
852
} catch (PythonFileDetector .InvalidEncodingException e ) {
840
- throw handleInvalidEncoding (filename , e );
853
+ throw raiseInvalidSyntax (filename , "encoding problem: %s" , e . getEncodingName () );
841
854
}
842
855
} else {
843
856
throw raise (TypeError , ErrorMessages .ARG_D_MUST_BE_S , "compile()" , 1 , "string, bytes or AST object" );
844
857
}
845
858
}
846
859
847
860
@ TruffleBoundary
848
- private RuntimeException handleInvalidEncoding (String filename , PythonFileDetector . InvalidEncodingException e ) {
861
+ private RuntimeException raiseInvalidSyntax (String filename , String format , Object ... args ) {
849
862
PythonContext context = getContext ();
850
863
// Create non-empty source to avoid overwriting the message with "unexpected EOF"
851
864
Source source = PythonLanguage .newSource (context , " " , filename , mayBeFromFile );
852
- throw getCore ().raiseInvalidSyntax (source , source .createUnavailableSection (), "encoding problem: %s" , e .getEncodingName ());
853
- }
854
-
855
- @ TruffleBoundary
856
- private static String createString (byte [] bytes , Charset charset ) {
857
- return new String (bytes , charset );
858
-
865
+ throw getCore ().raiseInvalidSyntax (source , source .createUnavailableSection (), format , args );
859
866
}
860
867
861
868
public static CompileNode create (boolean mapFilenameToUri ) {
0 commit comments