Add support for server use

grantjenks · grantjenks · commit 588f4d455589 · 2017-09-29T16:04:13.000-07:00
diff --git a/README.rst b/README.rst
@@ -71,6 +71,24 @@ out-file. Input and output default to stdin and stdout respectively. ::
     $ echo thisisatest | python -m wordsegment
     this is a test
 
+If you want to run `WordSegment`_ as a kind of server process then use Python's
+``-u`` option for unbuffered output. You can also set ``PYTHONUNBUFFERED=1`` in
+the environment. ::
+
+    >>> import subprocess as sp
+    >>> wordsegment = sp.Popen(
+            ['python', '-um', 'wordsegment'],
+            stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.STDOUT)
+    >>> wordsegment.stdin.write('thisisatest\n')
+    >>> wordsegment.stdout.readline()
+    'this is a test\n'
+    >>> wordsegment.stdin.write('workswithotherlanguages\n')
+    >>> wordsegment.stdout.readline()
+    'works with other languages\n'
+    >>> wordsegment.stdin.close()
+    >>> wordsegment.wait()  # Process exit code.
+    0
+
 The maximum segmented word length is 24 characters. Neither the unigram nor
 bigram data contain words exceeding that length. The corpus also excludes
 punctuation and all letters have been lowercased. Before segmenting text,
diff --git a/wordsegment/__init__.py b/wordsegment/__init__.py
@@ -199,9 +199,10 @@ def main(arguments=()):
                         default=sys.stdout)
 
     streams = parser.parse_args(arguments)
+    load()
 
-    for line in streams.infile:
-        streams.outfile.write(' '.join(segment(line)))
+    for line in iter(streams.infile.readline, ''):
+        streams.outfile.write(' '.join(segment(line.strip())))
         streams.outfile.write(os.linesep)
 
 
diff --git a/wordsegment/__main__.py b/wordsegment/__main__.py
@@ -0,0 +1,4 @@
+import sys
+from . import main
+
+main(sys.argv[1:])