|
| 1 | +from __future__ import absolute_import |
| 2 | +import unittest |
| 3 | + |
| 4 | +from frontera.contrib.backends.remote.messagebus import MessageBusBackend |
| 5 | +from frontera.settings import Settings |
| 6 | +from frontera.core.models import Request, Response |
| 7 | + |
| 8 | + |
| 9 | +r1 = Request('http://www.example.com/', meta={b'domain': {b'fingerprint': b'1'}}) |
| 10 | +r2 = Request('http://www.scrapy.org/', meta={b'domain': {b'fingerprint': b'2'}}) |
| 11 | +r3 = Request('http://www.test.com/some/page', meta={b'domain': {b'fingerprint': b'3'}}) |
| 12 | + |
| 13 | + |
| 14 | +class TestMessageBusBackend(unittest.TestCase): |
| 15 | + |
| 16 | + def mbb_setup(self, settings=None): |
| 17 | + manager = type('manager', (object,), {}) |
| 18 | + settings = settings or Settings() |
| 19 | + settings.MESSAGE_BUS = 'tests.mocks.message_bus.FakeMessageBus' |
| 20 | + settings.STORE_CONTENT = True |
| 21 | + manager.settings = settings |
| 22 | + manager.request_model = Request |
| 23 | + manager.response_model = Response |
| 24 | + return MessageBusBackend(manager) |
| 25 | + |
| 26 | + def test_feed_partitions_less_than_equal_partion_id_and_partion_id_less_than_zero(self): |
| 27 | + settings = Settings() |
| 28 | + # test partition_id > feed_partitions |
| 29 | + settings.SPIDER_PARTITION_ID = 2 |
| 30 | + settings.SPIDER_FEED_PARTITIONS = 1 |
| 31 | + self.assertRaises(ValueError, self.mbb_setup, settings) |
| 32 | + |
| 33 | + # test partition_id = feed_partitions |
| 34 | + settings.SPIDER_PARTITION_ID = 1 |
| 35 | + self.assertRaises(ValueError, self.mbb_setup, settings) |
| 36 | + |
| 37 | + # test partition_id < 0 |
| 38 | + settings.SPIDER_PARTITION_ID = -1 |
| 39 | + self.assertRaises(ValueError, self.mbb_setup, settings) |
| 40 | + |
| 41 | + def test_add_seeds(self): |
| 42 | + mbb = self.mbb_setup() |
| 43 | + mbb.add_seeds([r1, r2, r3]) |
| 44 | + seeds = [mbb._decoder.decode(m)[1][0] for m in mbb.spider_log_producer.messages] |
| 45 | + self.assertEqual(set([seed.url for seed in seeds]), set([r1.url, r2.url, r3.url])) |
| 46 | + |
| 47 | + def test_page_crawled(self): |
| 48 | + mbb = self.mbb_setup() |
| 49 | + resp = Response(r1.url, body='body', request=r1) |
| 50 | + mbb.page_crawled(resp) |
| 51 | + page = mbb._decoder.decode(mbb.spider_log_producer.messages[0])[1] |
| 52 | + self.assertEqual((page.request.url, page.body), (resp.request.url, b'body')) |
| 53 | + |
| 54 | + def test_links_extracted(self): |
| 55 | + mbb = self.mbb_setup() |
| 56 | + mbb.links_extracted(r1, [r2, r3]) |
| 57 | + requests = [mbb._decoder.decode(m)[1] for m in mbb.spider_log_producer.messages] |
| 58 | + links = [mbb._decoder.decode(m)[2][0] for m in mbb.spider_log_producer.messages] |
| 59 | + self.assertEqual(set([r.url for r in requests]), set([r1.url])) |
| 60 | + self.assertEqual(set([link.url for link in links]), set([r2.url, r3.url])) |
| 61 | + |
| 62 | + def test_request_error(self): |
| 63 | + mbb = self.mbb_setup() |
| 64 | + mbb.request_error(r1, 'error') |
| 65 | + _, error_request, error_message = mbb._decoder.decode(mbb.spider_log_producer.messages[0]) |
| 66 | + self.assertEqual((error_request.url, error_message), (r1.url, 'error')) |
| 67 | + |
| 68 | + def test_get_next_requests(self): |
| 69 | + mbb = self.mbb_setup() |
| 70 | + encoded_requests = [mbb._encoder.encode_request(r) for r in [r1, r2, r3]] |
| 71 | + mbb.consumer.put_messages(encoded_requests) |
| 72 | + mbb.consumer._set_offset(0) |
| 73 | + requests = set(mbb.get_next_requests(10, overused_keys=[], key_type='domain')) |
| 74 | + _, partition_id, offset = mbb._decoder.decode(mbb.spider_log_producer.messages[0]) |
| 75 | + self.assertEqual((partition_id, offset), (0, 0)) |
| 76 | + self.assertEqual(set([r.url for r in requests]), set([r1.url, r2.url, r3.url])) |
| 77 | + requests = set(mbb.get_next_requests(10, overused_keys=[], key_type='domain')) |
| 78 | + self.assertEqual([r.url for r in requests], []) |
| 79 | + # test overused keys |
| 80 | + mbb.consumer.put_messages(encoded_requests) |
| 81 | + requests = set(mbb.get_next_requests(10, overused_keys=['www.example.com'], key_type='domain')) |
| 82 | + self.assertEqual(set([r.url for r in requests]), set([r2.url, r3.url])) |
0 commit comments