|
1 | 1 | """ |
2 | 2 | Test Frontier |
3 | 3 | """ |
4 | | -from .hstestcase import HSTestCase |
| 4 | +from .conftest import TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT |
5 | 5 |
|
6 | 6 |
|
7 | | -class FrontierTest(HSTestCase): |
| 7 | +def _get_urls(batch): |
| 8 | + return [r[0] for r in batch['requests']] |
8 | 9 |
|
9 | | - def setUp(self): |
10 | | - self._delete_slot() |
11 | 10 |
|
12 | | - def tearDown(self): |
13 | | - self._delete_slot() |
| 11 | +def test_add_read(hsproject): |
| 12 | + frontier = hsproject.frontier |
14 | 13 |
|
15 | | - def _delete_slot(self): |
16 | | - frontier = self.project.frontier |
17 | | - frontier.delete_slot(self.frontier, self.slot) |
| 14 | + fps = [{'fp': '/'}] |
| 15 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps) |
| 16 | + fps = [{'fp': '/index.html'}, {'fp': '/index2.html'}] |
| 17 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps) |
| 18 | + frontier.flush() |
18 | 19 |
|
19 | | - def _remove_all_ids(self): |
20 | | - frontier = self.project.frontier |
21 | | - ids = [batch['id'] for batch in frontier.read(self.frontier, self.slot)] |
22 | | - frontier.delete(self.frontier, self.slot, ids) |
| 20 | + urls = [_get_urls(batch) for batch |
| 21 | + in frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)] |
| 22 | + expected_urls = [[u'/', u'/index.html', u'/index2.html']] |
| 23 | + assert urls == expected_urls |
23 | 24 |
|
24 | | - def _get_urls(self, batch): |
25 | | - return [r[0] for r in batch['requests']] |
26 | 25 |
|
27 | | - def test_add_read(self): |
28 | | - frontier = self.project.frontier |
| 26 | +def test_add_multiple_chunks(hsproject): |
| 27 | + frontier = hsproject.frontier |
| 28 | + old_count = frontier.newcount |
29 | 29 |
|
30 | | - fps = [{'fp': '/'}] |
31 | | - frontier.add(self.frontier, self.slot, fps) |
32 | | - fps = [{'fp': '/index.html'}, {'fp': '/index2.html'}] |
33 | | - frontier.add(self.frontier, self.slot, fps) |
34 | | - frontier.flush() |
| 30 | + batch_size = 50 |
| 31 | + fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
| 32 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps1) |
35 | 33 |
|
36 | | - urls = [self._get_urls(batch) for batch |
37 | | - in frontier.read(self.frontier, self.slot)] |
38 | | - expected_urls = [[u'/', u'/index.html', u'/index2.html']] |
39 | | - self.assertEqual(urls, expected_urls) |
| 34 | + fps2 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size, batch_size * 2)] |
| 35 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps2) |
40 | 36 |
|
41 | | - def test_add_multiple_chunks(self): |
42 | | - frontier = self.project.frontier |
43 | | - old_count = frontier.newcount |
| 37 | + fps3 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size * 2, batch_size * 3)] |
| 38 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps3) |
| 39 | + frontier.flush() |
44 | 40 |
|
45 | | - batch_size = 50 |
46 | | - fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
47 | | - frontier.add(self.frontier, self.slot, fps1) |
| 41 | + assert frontier.newcount == 150 + old_count |
48 | 42 |
|
49 | | - fps2 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size, batch_size * 2)] |
50 | | - frontier.add(self.frontier, self.slot, fps2) |
| 43 | + # insert repeated fingerprints |
| 44 | + fps4 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
| 45 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps3) |
| 46 | + frontier.flush() |
51 | 47 |
|
52 | | - fps3 = [{'fp': '/index_%s.html' % fp} for fp in range(batch_size * 2, batch_size * 3)] |
53 | | - frontier.add(self.frontier, self.slot, fps3) |
54 | | - frontier.flush() |
| 48 | + # new count is the same |
| 49 | + assert frontier.newcount == 150 + old_count |
55 | 50 |
|
56 | | - self.assertEqual(frontier.newcount, 150 + old_count) |
| 51 | + # get first 100 |
| 52 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 53 | + urls = [_get_urls(batch) for batch in batches] |
| 54 | + expected_urls = [[fp['fp'] for fp in fps1 + fps2]] |
| 55 | + assert urls == expected_urls |
57 | 56 |
|
58 | | - # insert repeated fingerprints |
59 | | - fps4 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
60 | | - frontier.add(self.frontier, self.slot, fps3) |
61 | | - frontier.flush() |
| 57 | + # delete first 100 |
| 58 | + ids = [batch['id'] for batch in batches] |
| 59 | + frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids) |
62 | 60 |
|
63 | | - # new count is the same |
64 | | - self.assertEqual(frontier.newcount, 150 + old_count) |
| 61 | + # get remaining 50 |
| 62 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)) |
| 63 | + urls = [_get_urls(batch) for batch in batches] |
| 64 | + expected_urls = [[fp['fp'] for fp in fps3]] |
| 65 | + assert urls == expected_urls |
65 | 66 |
|
66 | | - # get first 100 |
67 | | - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
68 | | - urls = [self._get_urls(batch) for batch in batches] |
69 | | - expected_urls = [[fp['fp'] for fp in fps1 + fps2]] |
70 | | - self.assertEqual(urls, expected_urls) |
71 | 67 |
|
72 | | - # delete first 100 |
73 | | - ids = [batch['id'] for batch in batches] |
74 | | - frontier.delete(self.frontier, self.slot, ids) |
| 68 | +def test_add_big_chunk(hsproject): |
| 69 | + frontier = hsproject.frontier |
75 | 70 |
|
76 | | - # get remaining 50 |
77 | | - batches = list(frontier.read(self.frontier, self.slot)) |
78 | | - urls = [self._get_urls(batch) for batch in batches] |
79 | | - expected_urls = [[fp['fp'] for fp in fps3]] |
80 | | - self.assertEqual(urls, expected_urls) |
| 71 | + batch_size = 300 |
| 72 | + fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
| 73 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps1) |
| 74 | + frontier.flush() |
81 | 75 |
|
82 | | - def test_add_big_chunk(self): |
83 | | - frontier = self.project.frontier |
| 76 | + # get first 100 |
| 77 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 78 | + urls = [_get_urls(batch) for batch in batches] |
| 79 | + expected_urls = [[fp['fp'] for fp in fps1[:100]]] |
| 80 | + assert urls == expected_urls |
84 | 81 |
|
85 | | - batch_size = 300 |
86 | | - fps1 = [{'fp': '/index_%s.html' % fp} for fp in range(0, batch_size)] |
87 | | - frontier.add(self.frontier, self.slot, fps1) |
88 | | - frontier.flush() |
| 82 | + # delete first 100 |
| 83 | + ids = [batch['id'] for batch in batches] |
| 84 | + frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids) |
89 | 85 |
|
90 | | - # get first 100 |
91 | | - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
92 | | - urls = [self._get_urls(batch) for batch in batches] |
93 | | - expected_urls = [[fp['fp'] for fp in fps1[:100]]] |
94 | | - self.assertEqual(urls, expected_urls) |
| 86 | + # get next 100 |
| 87 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 88 | + urls = [_get_urls(batch) for batch in batches] |
| 89 | + expected_urls = [[fp['fp'] for fp in fps1[100:200]]] |
| 90 | + assert urls == expected_urls |
95 | 91 |
|
96 | | - # delete first 100 |
97 | | - ids = [batch['id'] for batch in batches] |
98 | | - frontier.delete(self.frontier, self.slot, ids) |
| 92 | + # delete next 100 |
| 93 | + ids = [batch['id'] for batch in batches] |
| 94 | + frontier.delete(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, ids) |
99 | 95 |
|
100 | | - # get next 100 |
101 | | - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
102 | | - urls = [self._get_urls(batch) for batch in batches] |
103 | | - expected_urls = [[fp['fp'] for fp in fps1[100:200]]] |
104 | | - self.assertEqual(urls, expected_urls) |
| 96 | + # get next 100 |
| 97 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, mincount=100)) |
| 98 | + urls = [_get_urls(batch) for batch in batches] |
| 99 | + expected_urls = [[fp['fp'] for fp in fps1[200:300]]] |
| 100 | + assert urls == expected_urls |
105 | 101 |
|
106 | | - # delete next 100 |
107 | | - ids = [batch['id'] for batch in batches] |
108 | | - frontier.delete(self.frontier, self.slot, ids) |
109 | | - |
110 | | - # get next 100 |
111 | | - batches = list(frontier.read(self.frontier, self.slot, mincount=100)) |
112 | | - urls = [self._get_urls(batch) for batch in batches] |
113 | | - expected_urls = [[fp['fp'] for fp in fps1[200:300]]] |
114 | | - self.assertEqual(urls, expected_urls) |
115 | | - |
116 | | - def test_add_extra_params(self): |
117 | | - frontier = self.project.frontier |
118 | | - |
119 | | - qdata = {"a": 1, "b": 2, "c": 3} |
120 | | - fps = [{'fp': '/', "qdata": qdata}] |
121 | | - frontier.add(self.frontier, self.slot, fps) |
122 | | - frontier.flush() |
123 | | - |
124 | | - expected_request = [[u'/', {u'a': 1, u'c': 3, u'b': 2}]] |
125 | | - batches = list(frontier.read(self.frontier, self.slot)) |
126 | | - request = batches[0]['requests'] |
127 | | - self.assertEqual(request, expected_request) |
128 | 102 |
|
| 103 | +def test_add_extra_params(hsproject): |
| 104 | + frontier = hsproject.frontier |
| 105 | + |
| 106 | + qdata = {"a": 1, "b": 2, "c": 3} |
| 107 | + fps = [{'fp': '/', "qdata": qdata}] |
| 108 | + frontier.add(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT, fps) |
| 109 | + frontier.flush() |
| 110 | + |
| 111 | + expected_request = [[u'/', {u'a': 1, u'c': 3, u'b': 2}]] |
| 112 | + batches = list(frontier.read(TEST_FRONTIER_NAME, TEST_FRONTIER_SLOT)) |
| 113 | + request = batches[0]['requests'] |
| 114 | + assert request == expected_request |
0 commit comments