Skip to content

Commit 9fe52a0

Browse files
committed
Merge pull request #104 from imraanparker/master
Add the ability to change the Tika Server's log file path
2 parents 51e287d + d0f8fa8 commit 9fe52a0

File tree

1 file changed

+19
-19
lines changed

1 file changed

+19
-19
lines changed

tika/tika.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878
from urllib.parse import urlparse as urlparse
7979

8080
if sys.version_info[0] < 3:
81-
import codecs
8281
open = codecs.open
8382

8483
import requests
@@ -94,6 +93,7 @@
9493
TikaVersion = os.getenv('TIKA_VERSION', '1.12')
9594
TikaJarPath = tempfile.gettempdir()
9695
TikaFilesPath = tempfile.gettempdir()
96+
TikaServerLogFilePath = tempfile.gettempdir()
9797
TikaServerJar = os.getenv(
9898
'TIKA_SERVER_JAR',
9999
"http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server/"+TikaVersion+"/tika-server-"+TikaVersion+".jar")
@@ -116,7 +116,7 @@ def die(*s): warn('Error:', *s); echo2(USAGE); sys.exit()
116116

117117
def runCommand(cmd, option, urlOrPaths, port, outDir=None, serverHost=ServerHost, tikaServerJar=TikaServerJar, verbose=Verbose, encode=EncodeUtf8):
118118
"""Run the Tika command by calling the Tika server and return results in JSON format (or plain text)."""
119-
# import pdb; pdb.set_trace()
119+
# import pdb; pdb.set_trace()
120120
if (cmd in 'parse' or cmd in 'detect') and (urlOrPaths == [] or urlOrPaths == None):
121121
die('No URLs/paths specified.')
122122
serverEndpoint = 'http://' + serverHost + ':' + port
@@ -142,12 +142,12 @@ def getPaths(urlOrPaths):
142142
"""
143143
paths = []
144144
for eachUrlOrPaths in urlOrPaths:
145-
if os.path.isdir(eachUrlOrPaths):
146-
for root, directories, filenames in walk(eachUrlOrPaths):
147-
for filename in filenames:
148-
paths.append(os.path.join(root,filename))
149-
else:
150-
paths.append(eachUrlOrPaths)
145+
if os.path.isdir(eachUrlOrPaths):
146+
for root, directories, filenames in walk(eachUrlOrPaths):
147+
for filename in filenames:
148+
paths.append(os.path.join(root,filename))
149+
else:
150+
paths.append(eachUrlOrPaths)
151151
return paths
152152

153153
def parseAndSave(option, urlOrPaths, outDir=None, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServerJar=TikaServerJar,
@@ -158,15 +158,15 @@ def parseAndSave(option, urlOrPaths, outDir=None, serverEndpoint=ServerEndpoint,
158158
metaPaths = []
159159
paths = getPaths(urlOrPaths)
160160
for path in paths:
161-
if outDir is None:
162-
metaPath = path + metaExtension
163-
else:
164-
metaPath = os.path.join(outDir, os.path.split(path)[1] + metaExtension)
165-
echo2('Writing %s' % metaPath)
166-
with open(metaPath, 'w', 'utf-8') as f:
167-
f.write(parse1(option, path, serverEndpoint, verbose, tikaServerJar, \
161+
if outDir is None:
162+
metaPath = path + metaExtension
163+
else:
164+
metaPath = os.path.join(outDir, os.path.split(path)[1] + metaExtension)
165+
echo2('Writing %s' % metaPath)
166+
with open(metaPath, 'w', 'utf-8') as f:
167+
f.write(parse1(option, path, serverEndpoint, verbose, tikaServerJar, \
168168
responseMimeType, services)[1] + u"\n")
169-
metaPaths.append(metaPath)
169+
metaPaths.append(metaPath)
170170
return metaPaths
171171

172172

@@ -181,7 +181,7 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
181181
responseMimeType='application/json',
182182
services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/text'}):
183183
"""Parse the object and return extracted metadata and/or text in JSON format."""
184-
path, type = getRemoteFile(urlOrPath, TikaFilesPath)
184+
path, file_type = getRemoteFile(urlOrPath, TikaFilesPath)
185185
if option not in services:
186186
warn('config option must be one of meta, text, or all; using all.')
187187
service = services.get(option, services['all'])
@@ -190,7 +190,7 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
190190
{'Accept': responseMimeType, 'Content-Disposition': 'attachment; filename=%s' % os.path.basename(path)},
191191
verbose, tikaServerJar)
192192

193-
if type == 'remote': os.unlink(path)
193+
if file_type == 'remote': os.unlink(path)
194194
return (status, response)
195195

196196
def detectLang(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServerJar=TikaServerJar,
@@ -349,7 +349,7 @@ def startServer(tikaServerJar, serverHost = ServerHost, port = Port):
349349
host = "0.0.0.0"
350350

351351
cmd = 'java -jar %s --port %i --host %s &' % (tikaServerJar, port, host)
352-
logFile = open(os.path.join(TikaJarPath, 'tika-server.log'), 'w')
352+
logFile = open(os.path.join(TikaServerLogFilePath, 'tika-server.log'), 'w')
353353
cmd = Popen(cmd , stdout= logFile, stderr = STDOUT, shell =True)
354354
time.sleep(5)
355355

0 commit comments

Comments
 (0)