-
Notifications
You must be signed in to change notification settings - Fork 63
Expand file tree
/
Copy pathtests.py
More file actions
138 lines (107 loc) · 4.88 KB
/
tests.py
File metadata and controls
138 lines (107 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# -*- coding: utf-8 -*-
from __future__ import print_function, unicode_literals
import sys
import unittest
import redditanalysis as wf
import praw
from collections import defaultdict
class TestSequenceFunctions(unittest.TestCase):
def setUp(self):
wf.popular_words = defaultdict(int)
def test_parse_cmd_line(self):
self.user, self.target, options = wf.parse_cmd_line()
self.assertEqual(self.user, sys.argv[1])
self.assertEqual(self.target, sys.argv[2])
def test_parse_text(self):
popular_words = defaultdict(int)
popular_words["testggg"] = 4
popular_words["gggtestggg"] = 4
popular_words["gytestyg"] = 3
popular_words["ygtestgy"] = 5
txt = ""
for word, freq in popular_words.items():
txt += str((word + " ") * freq)
wf.parse_text(txt, count_word_freqs=True, max_threshold=0.34)
self.assertEqual(popular_words, wf.popular_words)
# TODO: still need to test:
# anti-spamming w/ max_threshold
# count word freqs vs only count one word per sentence
def test_processRedditor(self):
"""
Can't think of an easy, repeatable way to test this right now.
TODO: make our own test redditor
"""
def test_process_submission(self):
# open connection to Reddit
r = praw.Reddit(user_agent="test analyzer by test")
r.config.decode_html_entities = True
popular_words = {"reddit": 48, "upvoted": 32, "upvote": 23,
"comments": 13, "3": 12, "fuck": 11, "qgyh2": 9,
"upvotes": 9, "fucking": 8, "posts": 7}
wfpw = defaultdict(int)
# parse a fixed thread
# TODO: make our own test thread
sub = r.get_submission(url=("http://www.reddit.com/r/pics/comments/"
"92dd8/test_post_please_ignore/"))
wf.process_submission(sub, count_word_freqs=True, max_threshold=0.34)
# only look at the top 10 most-used words in the thread
# TODO: look at all words used in thread
ct = 0
for key in sorted(wf.popular_words, key=wf.popular_words.get,
reverse=True):
wfpw[key] = wf.popular_words[key]
ct += 1
if ct >= 10:
break
self.assertEqual(popular_words, wfpw)
def test_processSubreddit(self):
"""
Can't think of an easy, repeatable way to do this right now.
TODO: make our own test subreddit
"""
def test_tokenize(self):
def tk(text):
return list(wf.tokenize(text))
# Test whitespace handling
self.assertEqual(['hello', 'world'], tk('hello world'))
self.assertEqual(['hello', 'world'], tk('hello world'))
self.assertEqual(['hello', 'world'], tk('hello\nworld'))
self.assertEqual(['hello', 'world'], tk('hello\rworld'))
self.assertEqual(['hello', 'world'], tk('hello\tworld'))
# Test url removal
self.assertEqual(['a', 'b'], tk('a http://reddit.com/foobar b'))
self.assertEqual(['a', 'b'], tk('a https://github.com/rhiever b'))
# Test domain removal
self.assertEqual(['a', 'b'], tk('a reddit.com b'))
self.assertEqual(['a', 'b'], tk('a reddit.com/ b'))
self.assertEqual(['a', 'b'], tk('a imgur.com/somefile.jpg b'))
# Test handling of r/ and u/
self.assertEqual(['a', 'r', 'muws', 'b'], tk('a r/muws b'))
self.assertEqual(['a', 'r', 'muws', 'b'], tk('a /r/muws b'))
self.assertEqual(['a', 'r', 'muws', 'b'], tk('a /r/muws/ b'))
self.assertEqual(['a', 'u', 'bboe', 'b'], tk('a u/bboe b'))
self.assertEqual(['a', 'u', 'bboe', 'b'], tk('a /u/bboe b'))
self.assertEqual(['a', 'u', 'bboe', 'b'], tk('a /u/bboe/ b'))
# Test possessive remoal
self.assertEqual(['a', 'bboe', 'b'], tk('a bboe\'s b'))
# Test puntuation removal
self.assertEqual(['hello', 'world'], tk('!hello world'))
self.assertEqual(['hello', 'world'], tk('hello world!'))
# Verify contractions
self.assertEqual(["i'd", "i'll", "i'm"], tk("I'd I'll I'm"))
self.assertEqual(["you're", "can't", "i've"], tk("you're can't I've"))
# Test subtokens
self.assertEqual(['hello', 'world'], tk('hello/world'))
self.assertEqual(['hello', 'world'], tk(r'hello\world'))
# Test unicode removal
self.assertEqual(['a', 'helló', 'b'], tk('a ¡helló! b'))
self.assertEqual(['a', '잠', 'b'], tk('a 잠 b'))
self.assertEqual(['a', 'b'], tk('a≥b'))
self.assertEqual(['montréal', 'français'], tk('Montréal français'))
self.assertEqual(['a', 'background', 'b'], tk('a〘background〙b'))
def test_with_status(self):
"""
Is this even a function that should be tested?
"""
if __name__ == '__main__':
unittest.main()