Skip to content

Commit 83d4b8d

Browse files
Merge pull request #275 from Tooa/fix_#273
fix(parser): align `parser.from_file` with doc
2 parents d38c328 + 56e0aab commit 83d4b8d

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

tika/parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020
import os
2121
import json
2222

23-
def from_file(filename, service='all', serverEndpoint=ServerEndpoint, xmlContent=False, headers=None, config_path=None, requestOptions={}):
23+
def from_file(filename, serverEndpoint=ServerEndpoint, service='all', xmlContent=False, headers=None, config_path=None, requestOptions={}):
2424
'''
2525
Parses a file for metadata and content
2626
:param filename: path to file which needs to be parsed or binary file using open(path,'rb')
2727
:param serverEndpoint: Server endpoint url
28+
:param service: service requested from the tika server
29+
Default is 'all', which results in recursive text content+metadata.
30+
'meta' returns only metadata
31+
'text' returns only content
2832
:param xmlContent: Whether or not XML content be requested.
2933
Default is 'False', which results in text content.
3034
:param headers: Request headers to be sent to the tika reset server, should

tika/tests/test_from_file_service.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@
1717
#
1818
# python -m unittest tika.tests.test_from_file_service
1919

20+
import sys
2021
import unittest
22+
if sys.version_info >= (3, 3):
23+
from unittest import mock
24+
else:
25+
import mock
2126
import tika.parser
2227

2328

@@ -30,6 +35,15 @@ def test_default_service(self):
3035
'https://boe.es/boe/dias/2019/12/02/pdfs/BOE-A-2019-17288.pdf')
3136
self.assertEqual(result['metadata']['Content-Type'],'application/pdf')
3237
self.assertIn('AUTORIDADES Y PERSONAL',result['content'])
38+
@mock.patch('tika.parser._parse')
39+
@mock.patch('tika.parser.parse1')
40+
def test_remote_endpoint(self, tika_call_mock, _):
41+
result = tika.parser.from_file(
42+
'filename', 'http://tika:9998/tika')
43+
44+
tika_call_mock.assert_called_with(
45+
'all', 'filename', 'http://tika:9998/tika', headers=None, config_path=None,
46+
requestOptions={})
3347
def test_default_service_explicit(self):
3448
'parse file using default service explicitly'
3549
result = tika.parser.from_file(

0 commit comments

Comments
 (0)