|
3 | 3 | """pointofview - A Python package for determining a piece of text's point of view (first, second, third, or unknown).""" |
4 | 4 |
|
5 | 5 | import re |
| 6 | +from collections import OrderedDict |
6 | 7 |
|
7 | 8 | import pkg_resources |
8 | 9 |
|
9 | 10 | __version__ = pkg_resources.resource_string( |
10 | 11 | 'pointofview', 'VERSION').decode('utf-8').strip() |
11 | 12 |
|
12 | | -POV_WORDS = { |
13 | | - 'first': |
| 13 | +# NOTE: |
| 14 | +# Words are expected to be in lower case. |
| 15 | +# |
| 16 | +# Point of view is in order of precedence. |
| 17 | +# First person PoV can also contain second and third person words. |
| 18 | +# Second person PoV can also contain third person words. |
| 19 | +# Third person PoV can only contain third person words. |
| 20 | +POV_WORDS = OrderedDict([ |
| 21 | + ('first', |
14 | 22 | ["i", "i'm", "i'll", "i'd", "i've", "me", "mine", "myself", "we", |
15 | | - "we're", "we'll", "we'd", "we've", "us", "ours", "ourselves"], |
16 | | - 'second': |
| 23 | + "we're", "we'll", "we'd", "we've", "us", "ours", "ourselves"]), |
| 24 | + ('second', |
17 | 25 | ["you", "you're", "you'll", "you'd", "you've", |
18 | | - "your", "yours", "yourself", "yourselves"], |
19 | | - 'third': |
| 26 | + "your", "yours", "yourself", "yourselves"]), |
| 27 | + ('third', |
20 | 28 | ["he", "he's", "he'll", "he'd", "him", "his", "himself", "she", "she's", |
21 | 29 | "she'll", "she'd", "her", "hers", "herself", "it", "it's", "it'll", |
22 | 30 | "it'd", "itself", "they", "they're", "they'll", "they'd", "they've", |
23 | | - "them", "their", "theirs", "themselves"] |
24 | | -} |
| 31 | + "them", "their", "theirs", "themselves"]) |
| 32 | +]) |
25 | 33 |
|
26 | 34 | RE_WORDS = re.compile(r"[^\w’']+") |
27 | 35 |
|
28 | 36 |
|
29 | | -def _normalize_word(word): |
30 | | - return word.strip().lower().replace("’", "'") |
31 | | - |
32 | | - |
33 | | -def get_word_pov(word): |
34 | | - for pov in POV_WORDS: |
35 | | - if _normalize_word(word) in POV_WORDS[pov]: |
| 37 | +def get_word_pov(word, pov_words=POV_WORDS): |
| 38 | + for pov in pov_words: |
| 39 | + if word.lower().replace("’", "'") in ( |
| 40 | + pov_word.lower() for pov_word in pov_words[pov]): |
36 | 41 | return pov |
37 | 42 | return None |
38 | 43 |
|
39 | 44 |
|
40 | | -def parse_pov_words(text): |
41 | | - pov_words = { |
42 | | - 'first': [], |
43 | | - 'second': [], |
44 | | - 'third': [], |
45 | | - } |
| 45 | +def parse_pov_words(text, pov_words=POV_WORDS): |
| 46 | + text_pov_words = {} |
46 | 47 | words = re.split(RE_WORDS, text.strip().lower()) |
| 48 | + for pov in pov_words: |
| 49 | + text_pov_words[pov] = [] |
47 | 50 | for word in words: |
48 | | - pov = get_word_pov(word) |
49 | | - if pov != None: |
50 | | - pov_words[pov].append(word) |
51 | | - return pov_words |
52 | | - |
53 | | - |
54 | | -def get_pov(text): |
55 | | - pov_words = parse_pov_words(text) |
56 | | - if len(pov_words['first']) > 0: |
57 | | - return 'first' |
58 | | - elif len(pov_words['second']) > 0: |
59 | | - return 'second' |
60 | | - elif len(pov_words['third']) > 0: |
61 | | - return 'third' |
62 | | - else: |
63 | | - return None |
| 51 | + word_pov = get_word_pov(word, pov_words) |
| 52 | + if word_pov != None: |
| 53 | + text_pov_words[word_pov].append(word) |
| 54 | + return text_pov_words |
| 55 | + |
| 56 | + |
| 57 | +def get_text_pov(text, pov_words=POV_WORDS): |
| 58 | + text_pov_words = parse_pov_words(text, pov_words) |
| 59 | + for pov in POV_WORDS: |
| 60 | + if len(text_pov_words[pov]) > 0: |
| 61 | + return pov |
| 62 | + return None |
0 commit comments