Skip to content

Commit dba96ae

Browse files
voithsibiryakov
authored andcommitted
Added tests for message_bus_backend (#205)
added tests for message_bus_backend
1 parent 9ff6e76 commit dba96ae

File tree

3 files changed

+89
-3
lines changed

3 files changed

+89
-3
lines changed

frontera/contrib/backends/remote/messagebus.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def __init__(self, manager):
3131
self._logger.info("Consuming from partition id %d", self.partition_id)
3232

3333
@classmethod
34-
def from_manager(clas, manager):
35-
return clas(manager)
34+
def from_manager(cls, manager):
35+
return cls(manager)
3636

3737
def frontier_start(self):
3838
pass

tests/mocks/message_bus.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ class Consumer(BaseStreamConsumer):
66

77
def __init__(self):
88
self.messages = []
9+
self.offset = None
910

1011
def put_messages(self, messages=[]):
1112
self.messages += messages
@@ -19,8 +20,11 @@ def get_messages(self, timeout=0, count=1):
1920
break
2021
return lst
2122

23+
def _set_offset(self, offset):
24+
self.offset = offset
25+
2226
def get_offset(self):
23-
pass
27+
return self.offset
2428

2529

2630
class Producer(object):

tests/test_message_bus_backend.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
from __future__ import absolute_import
2+
import unittest
3+
4+
from frontera.contrib.backends.remote.messagebus import MessageBusBackend
5+
from frontera.settings import Settings
6+
from frontera.core.models import Request, Response
7+
8+
9+
r1 = Request('http://www.example.com/', meta={b'domain': {b'fingerprint': b'1'}})
10+
r2 = Request('http://www.scrapy.org/', meta={b'domain': {b'fingerprint': b'2'}})
11+
r3 = Request('http://www.test.com/some/page', meta={b'domain': {b'fingerprint': b'3'}})
12+
13+
14+
class TestMessageBusBackend(unittest.TestCase):
15+
16+
def mbb_setup(self, settings=None):
17+
manager = type('manager', (object,), {})
18+
settings = settings or Settings()
19+
settings.MESSAGE_BUS = 'tests.mocks.message_bus.FakeMessageBus'
20+
settings.STORE_CONTENT = True
21+
manager.settings = settings
22+
manager.request_model = Request
23+
manager.response_model = Response
24+
return MessageBusBackend(manager)
25+
26+
def test_feed_partitions_less_than_equal_partion_id_and_partion_id_less_than_zero(self):
27+
settings = Settings()
28+
# test partition_id > feed_partitions
29+
settings.SPIDER_PARTITION_ID = 2
30+
settings.SPIDER_FEED_PARTITIONS = 1
31+
self.assertRaises(ValueError, self.mbb_setup, settings)
32+
33+
# test partition_id = feed_partitions
34+
settings.SPIDER_PARTITION_ID = 1
35+
self.assertRaises(ValueError, self.mbb_setup, settings)
36+
37+
# test partition_id < 0
38+
settings.SPIDER_PARTITION_ID = -1
39+
self.assertRaises(ValueError, self.mbb_setup, settings)
40+
41+
def test_add_seeds(self):
42+
mbb = self.mbb_setup()
43+
mbb.add_seeds([r1, r2, r3])
44+
seeds = [mbb._decoder.decode(m)[1][0] for m in mbb.spider_log_producer.messages]
45+
self.assertEqual(set([seed.url for seed in seeds]), set([r1.url, r2.url, r3.url]))
46+
47+
def test_page_crawled(self):
48+
mbb = self.mbb_setup()
49+
resp = Response(r1.url, body='body', request=r1)
50+
mbb.page_crawled(resp)
51+
page = mbb._decoder.decode(mbb.spider_log_producer.messages[0])[1]
52+
self.assertEqual((page.request.url, page.body), (resp.request.url, b'body'))
53+
54+
def test_links_extracted(self):
55+
mbb = self.mbb_setup()
56+
mbb.links_extracted(r1, [r2, r3])
57+
requests = [mbb._decoder.decode(m)[1] for m in mbb.spider_log_producer.messages]
58+
links = [mbb._decoder.decode(m)[2][0] for m in mbb.spider_log_producer.messages]
59+
self.assertEqual(set([r.url for r in requests]), set([r1.url]))
60+
self.assertEqual(set([link.url for link in links]), set([r2.url, r3.url]))
61+
62+
def test_request_error(self):
63+
mbb = self.mbb_setup()
64+
mbb.request_error(r1, 'error')
65+
_, error_request, error_message = mbb._decoder.decode(mbb.spider_log_producer.messages[0])
66+
self.assertEqual((error_request.url, error_message), (r1.url, 'error'))
67+
68+
def test_get_next_requests(self):
69+
mbb = self.mbb_setup()
70+
encoded_requests = [mbb._encoder.encode_request(r) for r in [r1, r2, r3]]
71+
mbb.consumer.put_messages(encoded_requests)
72+
mbb.consumer._set_offset(0)
73+
requests = set(mbb.get_next_requests(10, overused_keys=[], key_type='domain'))
74+
_, partition_id, offset = mbb._decoder.decode(mbb.spider_log_producer.messages[0])
75+
self.assertEqual((partition_id, offset), (0, 0))
76+
self.assertEqual(set([r.url for r in requests]), set([r1.url, r2.url, r3.url]))
77+
requests = set(mbb.get_next_requests(10, overused_keys=[], key_type='domain'))
78+
self.assertEqual([r.url for r in requests], [])
79+
# test overused keys
80+
mbb.consumer.put_messages(encoded_requests)
81+
requests = set(mbb.get_next_requests(10, overused_keys=['www.example.com'], key_type='domain'))
82+
self.assertEqual(set([r.url for r in requests]), set([r2.url, r3.url]))

0 commit comments

Comments
 (0)