Skip to content

Commit 6ff37a3

Browse files
committed
Merge pull request #46 from scrapinghub/use-crawler-settings-as-fallback
Frontera configuration using Scrapy settings infrastructure.
2 parents e9040c4 + bf07f80 commit 6ff37a3

File tree

9 files changed

+139
-25
lines changed

9 files changed

+139
-25
lines changed

docs/source/topics/frontier-api.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class method, using either a string path::
3030
>>> from frontera import FrontierManager
3131
>>> frontier = FrontierManager.from_settings('my_project.frontier.settings')
3232

33-
or a :class:`Settings <frontera.settings.Settings>` object instance::
33+
or a :class:`BaseSettings <frontera.settings.BaseSettings>` object instance::
3434

3535
>>> from frontera import FrontierManager, Settings
3636
>>> settings = Settings()

docs/source/topics/scrapy-integration.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,18 @@ Default: ``None``
8181

8282
A file path pointing to Frontera settings.
8383

84+
85+
Defining frontier settings via Scrapy settings
86+
==============================================
87+
88+
:ref:`Frontier settings <frontier-built-in-frontier-settings>` can also be defined via Scrapy settings.
89+
In this case, the order of precedence will be the following:
90+
91+
1. Settings defined in the file pointed by FRONTERA_SETTINGS (higher precedence)
92+
2. Settings defined in the Scrapy settings
93+
3. Default frontier settings
94+
95+
8496
.. _Scrapy middlewares: http://doc.scrapy.org/en/latest/topics/downloader-middleware.html
8597
.. _Scrapy settings: http://doc.scrapy.org/en/latest/topics/settings.html
8698
.. _DOWNLOADER_MIDDLEWARES: http://doc.scrapy.org/en/latest/topics/settings.html#std:setting-DOWNLOADER_MIDDLEWARES

frontera/contrib/scrapy/schedulers/frontier.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from time import time
77

88
from frontera.contrib.scrapy.manager import ScrapyFrontierManager
9+
from frontera.contrib.scrapy.settings_adapter import ScrapySettingsAdapter
910

1011
STATS_PREFIX = 'frontera'
1112

@@ -75,12 +76,8 @@ def __init__(self, crawler):
7576
self.stats_manager = StatsManager(crawler.stats)
7677
self._pending_requests = deque()
7778
self.redirect_enabled = crawler.settings.get('REDIRECT_ENABLED')
78-
79-
frontier_settings = crawler.settings.get('FRONTERA_SETTINGS', None)
80-
if not frontier_settings:
81-
log.msg('FRONTERA_SETTINGS not found! Using default Frontera settings...', log.WARNING)
82-
self.frontier = ScrapyFrontierManager(frontier_settings)
83-
79+
settings = ScrapySettingsAdapter(crawler.settings)
80+
self.frontier = ScrapyFrontierManager(settings)
8481
self._delay_on_empty = self.frontier.manager.settings.get('DELAY_ON_EMPTY')
8582
self._delay_next_call = 0.0
8683

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from frontera.settings import BaseSettings, DefaultSettings
2+
3+
4+
class ScrapySettingsAdapter(BaseSettings):
5+
"""
6+
Wrapps the frontera settings, falling back to scrapy and default settings
7+
"""
8+
def __init__(self, crawler_settings):
9+
frontera_settings = crawler_settings.get('FRONTERA_SETTINGS', None)
10+
super(ScrapySettingsAdapter, self).__init__(module=frontera_settings)
11+
self._crawler_settings = crawler_settings or {}
12+
self._default_settings = DefaultSettings()
13+
14+
def get(self, key, default_value=None):
15+
val = super(ScrapySettingsAdapter, self).get(key)
16+
if val is not None:
17+
return val
18+
19+
val = self._crawler_settings.get(key)
20+
if val is not None:
21+
return val
22+
23+
return self._default_settings.get(key, default_value)

frontera/core/manager.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from frontera.exceptions import NotConfigured
44
from frontera.utils.misc import load_object
5-
from frontera.settings import Settings
5+
from frontera.settings import Settings, BaseSettings
66
from frontera.core.components import Backend, Middleware
77
from frontera.logger import FrontierLogger
88
from frontera.core import models
@@ -119,11 +119,10 @@ def __init__(self, request_model, response_model, backend, logger, event_log_man
119119
def from_settings(cls, settings=None):
120120
"""
121121
Returns a :class:`FrontierManager <frontera.core.manager.FrontierManager>` instance initialized with \
122-
the passed settings argument. Argument value can either be a string path pointing to settings file or a \
123-
:class:`Settings <frontera.settings.Settings>` object instance. If no settings is given,
122+
the passed settings argument. If no settings is given,
124123
:ref:`frontier default settings <frontier-default-settings>` are used.
125124
"""
126-
manager_settings = Settings(settings)
125+
manager_settings = Settings.object_from(settings)
127126
return FrontierManager(request_model=manager_settings.REQUEST_MODEL,
128127
response_model=manager_settings.RESPONSE_MODEL,
129128
backend=manager_settings.BACKEND,

frontera/settings/__init__.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
import default_settings
55

66

7-
class Settings(object):
7+
class BaseSettings(object):
88
"""
99
An object that holds frontier settings values.
10+
11+
This also defines the base interface for all classes that are to be used
12+
as settings in frontera.
1013
"""
1114
def __init__(self, module=None, attributes=None):
1215
"""
@@ -15,7 +18,6 @@ def __init__(self, module=None, attributes=None):
1518
1619
"""
1720
self.attributes = {}
18-
self.add_module(default_settings)
1921
if module:
2022
self.add_module(module)
2123
if attributes:
@@ -25,9 +27,24 @@ def __init__(self, module=None, attributes=None):
2527
def from_params(cls, **kwargs):
2628
return cls(attributes=kwargs)
2729

30+
@classmethod
31+
def object_from(cls, settings):
32+
"""
33+
Generates a new settings object based on a previous obj or settings
34+
file.
35+
36+
`settings` can either be a string path pointing to settings file or a \
37+
:class:`BaseSettings <frontera.settings.BaseSettings>` object instance.
38+
"""
39+
if isinstance(settings, BaseSettings):
40+
return settings
41+
else:
42+
return cls(settings)
43+
2844
def __getattr__(self, name):
29-
if name.isupper() and name in self.attributes:
30-
return self.attributes[name]
45+
val = self.get(name)
46+
if val is not None:
47+
return val
3148
else:
3249
return self.__dict__[name]
3350

@@ -38,15 +55,11 @@ def __setattr__(self, name, value):
3855
self.__dict__[name] = value
3956

4057
def add_module(self, module):
41-
if isinstance(module, Settings):
42-
for name, value in module.attributes.items():
43-
self.set(name, value)
44-
else:
45-
if isinstance(module, six.string_types):
46-
module = import_module(module)
47-
for key in dir(module):
48-
if key.isupper():
49-
self.set(key, getattr(module, key))
58+
if isinstance(module, six.string_types):
59+
module = import_module(module)
60+
for key in dir(module):
61+
if key.isupper():
62+
self.set(key, getattr(module, key))
5063

5164
def get(self, key, default_value=None):
5265
if not key.isupper():
@@ -60,3 +73,16 @@ def set(self, key, value):
6073
def set_from_dict(self, attributes):
6174
for name, value in attributes.items():
6275
self.set(name, value)
76+
77+
78+
class DefaultSettings(BaseSettings):
79+
def __init__(self):
80+
super(DefaultSettings, self).__init__(default_settings)
81+
82+
83+
class Settings(BaseSettings):
84+
def __init__(self, module=None, attributes=None):
85+
super(Settings, self).__init__(default_settings, attributes)
86+
87+
if module:
88+
self.add_module(module)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from frontera.contrib.scrapy.settings_adapter import ScrapySettingsAdapter
4+
5+
6+
def test_fallsback_to_crawler_settings():
7+
settings = ScrapySettingsAdapter({'DELAY_ON_EMPTY': 10})
8+
assert settings.get('DELAY_ON_EMPTY') == 10
9+
10+
11+
def test_frontera_settings_have_precedence_over_crawler_settings():
12+
crawler_settings = {'MAX_REQUESTS': 10,
13+
'FRONTERA_SETTINGS': 'frontera.tests.scrapy_spider.frontera.settings'}
14+
settings = ScrapySettingsAdapter(crawler_settings)
15+
assert settings.get('MAX_REQUESTS') == 5

frontera/tests/test_scrapy_spider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ def test_scrapy_spider():
2020

2121
stats = crawler.stats.spider_stats['example']
2222
assert stats['frontera/crawled_pages_count'] == 5
23-
assert spider.callback_calls > 0
23+
assert spider.callback_calls > 0

frontera/tests/test_settings.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from frontera.settings import Settings, BaseSettings
4+
5+
6+
def test_settings_on_a_python_module_are_loaded():
7+
settings = Settings('frontera.tests.scrapy_spider.frontera.settings')
8+
assert settings.get('MAX_REQUESTS') == 5
9+
10+
11+
def test_settings_passed_as_attributes_can_be_found():
12+
settings = Settings(attributes={'SETTING': 'value'})
13+
assert settings.get('SETTING') == 'value'
14+
15+
16+
def test_fallsback_to_frontera_default_settings():
17+
settings = Settings()
18+
assert settings.get('MAX_NEXT_REQUESTS') == 0
19+
20+
21+
def test_allows_settings_to_be_accessed_by_attribute():
22+
settings = Settings()
23+
assert settings.MAX_NEXT_REQUESTS == 0
24+
25+
26+
def test_settings_attributes_can_be_assigned():
27+
settings = Settings()
28+
settings.NEW_ATTRIBUTE = 10
29+
assert settings.NEW_ATTRIBUTE == 10
30+
31+
32+
def test_object_from_loads_settings_from_a_module():
33+
module = 'frontera.tests.scrapy_spider.frontera.settings'
34+
settings = BaseSettings.object_from(module)
35+
assert settings.get('MAX_REQUESTS') == 5
36+
37+
38+
def test_new_instance_copies_the_given_instance():
39+
settings = Settings()
40+
new_instance = BaseSettings.object_from(settings)
41+
assert new_instance.MAX_NEXT_REQUESTS == 0
42+
assert type(new_instance) == Settings

0 commit comments

Comments
 (0)