|
58 | 58 | detected = detector.from_buffer('some buffered content', config_path='/path/to/configfile') |
59 | 59 |
|
60 | 60 | ''' |
| 61 | +import types |
61 | 62 |
|
62 | 63 | USAGE = """ |
63 | 64 | tika.py [-v] [-e] [-o <outputDir>] [--server <TikaServerEndpoint>] [--install <UrlToTikaServerJar>] [--port <portNumber>] <command> <option> <urlOrPathToFile> |
@@ -140,6 +141,7 @@ def make_content_disposition_header(fn): |
140 | 141 | from subprocess import STDOUT |
141 | 142 | from os import walk |
142 | 143 | import logging |
| 144 | +import io |
143 | 145 |
|
144 | 146 | log_path = os.getenv('TIKA_LOG_PATH', tempfile.gettempdir()) |
145 | 147 | log_file = os.path.join(log_path, 'tika.log') |
@@ -325,9 +327,9 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti |
325 | 327 | service = services.get(option, services['all']) |
326 | 328 | if service == '/tika': responseMimeType = 'text/plain' |
327 | 329 | headers.update({'Accept': responseMimeType, 'Content-Disposition': make_content_disposition_header(path.encode('utf-8') if type(path) is unicode_string else path)}) |
328 | | - with open(path, 'rb') as f: |
| 330 | + with urlOrPath if _is_file_object(urlOrPath) else open(path, 'rb') as f: |
329 | 331 | status, response = callServer('put', serverEndpoint, service, f, |
330 | | - headers, verbose, tikaServerJar, config_path=config_path, |
| 332 | + headers, verbose, tikaServerJar, config_path=config_path, |
331 | 333 | rawResponse=rawResponse, requestOptions=requestOptions) |
332 | 334 |
|
333 | 335 | if file_type == 'remote': os.unlink(path) |
@@ -690,14 +692,26 @@ def toFilename(url): |
690 | 692 | value = re.sub(r'[^\w\s\.\-]', '-', path).strip().lower() |
691 | 693 | return re.sub(r'[-\s]+', '-', value).strip("-")[-200:] |
692 | 694 |
|
693 | | - |
| 695 | + |
| 696 | +def _is_file_object(f): |
| 697 | + try: |
| 698 | + file_types = (types.FileType, io.IOBase) |
| 699 | + except AttributeError: |
| 700 | + file_types = (io.IOBase,) |
| 701 | + |
| 702 | + return isinstance(f, file_types) |
| 703 | + |
694 | 704 | def getRemoteFile(urlOrPath, destPath): |
695 | 705 | ''' |
696 | 706 | Fetches URL to local path or just returns absolute path. |
697 | 707 | :param urlOrPath: resource locator, generally URL or path |
698 | 708 | :param destPath: path to store the resource, usually a path on file system |
699 | | - :return: tuple having (path, 'local'/'remote') |
| 709 | + :return: tuple having (path, 'local'/'remote'/'binary') |
700 | 710 | ''' |
| 711 | + # handle binary stream input |
| 712 | + if _is_file_object(urlOrPath): |
| 713 | + return (urlOrPath.name, 'binary') |
| 714 | + |
701 | 715 | urlp = urlparse(urlOrPath) |
702 | 716 | if urlp.scheme == '': |
703 | 717 | return (os.path.abspath(urlOrPath), 'local') |
|
0 commit comments