1616import org .apache .tika .parser .AutoDetectParser ;
1717import org .apache .tika .parser .Parser ;
1818import org .apache .tika .parser .ParserDecorator ;
19- import org .elasticsearch .SpecialPermission ;
20- import org .elasticsearch .bootstrap .FilePermissionUtils ;
21- import org .elasticsearch .core .PathUtils ;
22- import org .elasticsearch .core .SuppressForbidden ;
23- import org .elasticsearch .jdk .JarHell ;
2419
2520import java .io .ByteArrayInputStream ;
2621import java .io .IOException ;
27- import java .io .UncheckedIOException ;
28- import java .lang .reflect .ReflectPermission ;
29- import java .net .URISyntaxException ;
30- import java .net .URL ;
31- import java .net .URLClassLoader ;
32- import java .nio .file .Files ;
33- import java .nio .file .Path ;
34- import java .security .AccessControlContext ;
35- import java .security .AccessController ;
36- import java .security .PermissionCollection ;
37- import java .security .Permissions ;
38- import java .security .PrivilegedActionException ;
39- import java .security .PrivilegedExceptionAction ;
40- import java .security .ProtectionDomain ;
41- import java .security .SecurityPermission ;
4222import java .util .Arrays ;
4323import java .util .HashSet ;
44- import java .util .LinkedHashSet ;
45- import java .util .PropertyPermission ;
4624import java .util .Set ;
4725
4826/**
@@ -90,24 +68,8 @@ final class TikaImpl {
9068 * parses with tika, throwing any exception hit while parsing the document
9169 */
9270 static String parse (final byte content [], final Metadata metadata , final int limit ) throws TikaException , IOException {
93- // check that its not unprivileged code like a script
94- SpecialPermission .check ();
95-
9671 try {
97- return AccessController .doPrivileged (
98- (PrivilegedExceptionAction <String >) () -> TIKA_INSTANCE .parseToString (new ByteArrayInputStream (content ), metadata , limit ),
99- RESTRICTED_CONTEXT
100- );
101- } catch (PrivilegedActionException e ) {
102- // checked exception from tika: unbox it
103- Throwable cause = e .getCause ();
104- if (cause instanceof TikaException tikaException ) {
105- throw tikaException ;
106- } else if (cause instanceof IOException ioException ) {
107- throw ioException ;
108- } else {
109- throw new AssertionError (cause );
110- }
72+ return TIKA_INSTANCE .parseToString (new ByteArrayInputStream (content ), metadata , limit );
11173 } catch (LinkageError e ) {
11274 if (e .getMessage ().contains ("bouncycastle" )) {
11375 /*
@@ -119,76 +81,4 @@ static String parse(final byte content[], final Metadata metadata, final int lim
11981 throw new RuntimeException (e );
12082 }
12183 }
122-
123- // apply additional containment for parsers, this is intersected with the current permissions
124- // its hairy, but worth it so we don't have some XML flaw reading random crap from the FS
125- private static final AccessControlContext RESTRICTED_CONTEXT = isUsingSecurityManager ()
126- ? new AccessControlContext (new ProtectionDomain [] { new ProtectionDomain (null , getRestrictedPermissions ()) })
127- : null ;
128-
129- private static boolean isUsingSecurityManager () {
130- return false ;
131- }
132-
133- // compute some minimal permissions for parsers. they only get r/w access to the java temp directory,
134- // the ability to load some resources from JARs, and read sysprops
135- @ SuppressForbidden (reason = "adds access to tmp directory" )
136- static PermissionCollection getRestrictedPermissions () {
137- Permissions perms = new Permissions ();
138-
139- // property/env access needed for parsing
140- perms .add (new PropertyPermission ("*" , "read" ));
141- perms .add (new RuntimePermission ("getenv.TIKA_CONFIG" ));
142-
143- try {
144- // add permissions for resource access:
145- // classpath
146- addReadPermissions (perms , JarHell .parseClassPath ());
147- // plugin jars
148- if (TikaImpl .class .getClassLoader () instanceof URLClassLoader urlClassLoader ) {
149- URL [] urls = urlClassLoader .getURLs ();
150- Set <URL > set = new LinkedHashSet <>(Arrays .asList (urls ));
151- if (set .size () != urls .length ) {
152- throw new AssertionError ("duplicate jars: " + Arrays .toString (urls ));
153- }
154- addReadPermissions (perms , set );
155- }
156- // jvm's java.io.tmpdir (needs read/write)
157- FilePermissionUtils .addDirectoryPath (
158- perms ,
159- "java.io.tmpdir" ,
160- PathUtils .get (System .getProperty ("java.io.tmpdir" )),
161- "read,readlink,write,delete" ,
162- false
163- );
164- } catch (IOException e ) {
165- throw new UncheckedIOException (e );
166- }
167- // current hacks needed for POI/PDFbox issues:
168- perms .add (new SecurityPermission ("putProviderProperty.BC" ));
169- perms .add (new SecurityPermission ("insertProvider" ));
170- perms .add (new ReflectPermission ("suppressAccessChecks" ));
171- perms .add (new RuntimePermission ("accessClassInPackage.sun.java2d.cmm.kcms" ));
172- // xmlbeans, use by POI, needs to get the context classloader
173- perms .add (new RuntimePermission ("getClassLoader" ));
174- perms .setReadOnly ();
175- return perms ;
176- }
177-
178- // add resources to (what is typically) a jar, but might not be (e.g. in tests/IDE)
179- @ SuppressForbidden (reason = "adds access to jar resources" )
180- static void addReadPermissions (Permissions perms , Set <URL > resources ) throws IOException {
181- try {
182- for (URL url : resources ) {
183- Path path = PathUtils .get (url .toURI ());
184- if (Files .isDirectory (path )) {
185- FilePermissionUtils .addDirectoryPath (perms , "class.path" , path , "read,readlink" , false );
186- } else {
187- FilePermissionUtils .addSingleFilePath (perms , path , "read,readlink" );
188- }
189- }
190- } catch (URISyntaxException bogus ) {
191- throw new RuntimeException (bogus );
192- }
193- }
19484}
0 commit comments