diff --git a/frontera/logger/filters/__init__.py b/frontera/logger/filters/__init__.py index 04a3b4bc2..92784b51d 100644 --- a/frontera/logger/filters/__init__.py +++ b/frontera/logger/filters/__init__.py @@ -1,18 +1,13 @@ from __future__ import absolute_import import logging import six - - -def format_str(s): - if isinstance(s, six.text_type): - return s.encode('ascii', 'ignore') - return str(s) +from w3lib.util import to_native_str class PlainValuesFilter(logging.Filter): def __init__(self, separator=None, excluded_fields=None, msg_max_length=0): super(PlainValuesFilter, self).__init__() - self.separator = separator or " " + self.separator = to_native_str(separator or " ") self.excluded_fields = excluded_fields or [] self.msg_max_length = msg_max_length @@ -20,8 +15,8 @@ def filter(self, record): if isinstance(record.msg, dict): for field_name in self.excluded_fields: setattr(record, field_name, record.msg.get(field_name, '')) - record.msg = self.separator.join([format_str(value) - for key, value in record.msg.items() + record.msg = self.separator.join([to_native_str(value) + for key, value in six.iteritems(record.msg) if key not in self.excluded_fields]) if self.msg_max_length and len(record.msg) > self.msg_max_length: record.msg = record.msg[0:self.msg_max_length-3] + "..." diff --git a/frontera/logger/formatters/__init__.py b/frontera/logger/formatters/__init__.py index d57ae0f09..a9f687044 100644 --- a/frontera/logger/formatters/__init__.py +++ b/frontera/logger/formatters/__init__.py @@ -1,10 +1,7 @@ from __future__ import absolute_import import logging -from .text import DETAILED, SHORT - LOG_FORMAT = "[%(name)s] %(message)s" -LOG_EVENT_FORMAT = "%(asctime)s %(event)-16s %(message)s" try: from .color import ColorFormatter @@ -18,41 +15,9 @@ "CRITICAL": "bold_purple", } - EVENTS = ColorFormatter( - format="%(log_color)s"+LOG_EVENT_FORMAT, - log_colors={ - "FRONTIER_START": "bold_yellow", - "FRONTIER_STOP": "bold_yellow", - "ADD_SEED": "cyan", - "ADD_SEEDS": "cyan", - "PAGE_CRAWLED": "blue", - "PAGE_CRAWLED_ERROR": "red", - "GET_NEXT_PAGES": "purple", - }, - log_color_field="event") - CONSOLE = ColorFormatter( format=LOG_COLOR_FORMAT, log_colors=COLORS.copy(), log_color_field="levelname") - - CONSOLE_MANAGER = ColorFormatter( - format=LOG_COLOR_FORMAT, - log_colors=COLORS.copy(), - log_color_field="levelname") - - CONSOLE_BACKEND = ColorFormatter( - format=LOG_COLOR_FORMAT, - log_colors=COLORS.copy(), - log_color_field="levelname") - - CONSOLE_DEBUGGING = ColorFormatter( - format=LOG_COLOR_FORMAT, - log_colors=COLORS.copy(), - log_color_field="levelname") except ImportError: - EVENTS = logging.Formatter(fmt=LOG_EVENT_FORMAT) CONSOLE = logging.Formatter(fmt=LOG_FORMAT) - CONSOLE_MANAGER = logging.Formatter(fmt=LOG_FORMAT) - CONSOLE_BACKEND = logging.Formatter(fmt=LOG_FORMAT) - CONSOLE_DEBUGGING = logging.Formatter(fmt=LOG_FORMAT) diff --git a/frontera/logger/formatters/json.py b/frontera/logger/formatters/json.py index 07510ee31..3efe1f24c 100644 --- a/frontera/logger/formatters/json.py +++ b/frontera/logger/formatters/json.py @@ -1,21 +1,7 @@ from __future__ import absolute_import from pythonjsonlogger.jsonlogger import JsonFormatter - -import datetime -from json import JSONEncoder - - -class DateTimeEncoder(JSONEncoder): - def default(self, obj): - if isinstance(obj, datetime.datetime): - return obj.isoformat() - elif isinstance(obj, datetime.date): - return obj.isoformat() - elif isinstance(obj, datetime.timedelta): - return (datetime.datetime.min + obj).time().isoformat() - else: - return super(DateTimeEncoder, self).default(obj) +from frontera.utils.encoders import DateTimeEncoder class JSONFormatter(JsonFormatter): diff --git a/frontera/logger/formatters/text.py b/frontera/logger/formatters/text.py deleted file mode 100644 index a3504b05a..000000000 --- a/frontera/logger/formatters/text.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import absolute_import -import logging - -DETAILED = logging.Formatter(fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s") -SHORT = logging.Formatter(fmt="[%(name)-8s] %(levelname)7s: %(message)s") diff --git a/frontera/logger/handlers/__init__.py b/frontera/logger/handlers/__init__.py index c6e211335..811822dcd 100644 --- a/frontera/logger/handlers/__init__.py +++ b/frontera/logger/handlers/__init__.py @@ -2,22 +2,7 @@ import sys import logging -from frontera.logger import filters, formatters - - -EVENTS = logging.StreamHandler(stream=sys.stdout) -EVENTS.setFormatter(formatters.EVENTS) -EVENTS.setLevel(logging.INFO) -EVENTS.filters.append(filters.PLAINVALUES(excluded_fields=['event'])) +from frontera.logger import formatters CONSOLE = logging.StreamHandler(stream=sys.stdout) CONSOLE.setFormatter(formatters.CONSOLE) - -CONSOLE_MANAGER = logging.StreamHandler(stream=sys.stdout) -CONSOLE_MANAGER.setFormatter(formatters.CONSOLE_MANAGER) - -CONSOLE_BACKEND = logging.StreamHandler(stream=sys.stdout) -CONSOLE_BACKEND.setFormatter(formatters.CONSOLE_BACKEND) - -CONSOLE_DEBUGGING = logging.StreamHandler(stream=sys.stdout) -CONSOLE_DEBUGGING.setFormatter(formatters.CONSOLE_DEBUGGING) diff --git a/frontera/logger/handlers/redis.py b/frontera/logger/handlers/redis.py deleted file mode 100644 index b79d7ac1f..000000000 --- a/frontera/logger/handlers/redis.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import absolute_import - -import logging -import redis - - -class RedisListHandler(logging.Handler): - """ - Modified from https://github.com/jedp/python-redis-log - - Publish messages to redis a redis list. - - As a convenience, the classmethod to() can be used as a - constructor, just as in Andrei Savu's mongodb-log handler. - - If max_messages is set, trim the list to this many items. - """ - - @classmethod - def to(cls, list_name, max_messages=None, host='localhost', port=6379, level=logging.NOTSET): - return cls(list_name, max_messages, redis.Redis(host=host, port=port), level=level) - - def __init__(self, list_name, formatter=None, filters=None, max_messages=None, - redis_client=None, host='localhost', port=6379, - level=logging.NOTSET): - """ - Create a new logger for the given key and redis_client. - """ - logging.Handler.__init__(self, level) - self.list_name = list_name - self.redis_client = redis_client if redis_client else redis.Redis(host=host, port=port) - self.max_messages = max_messages - if formatter: - self.formatter = formatter - if filters: - self.filters = filters - - def emit(self, record): - """ - Publish record to redis logging list - """ - try: - if self.max_messages: - p = self.redis_client.pipeline() - p.rpush(self.list_name, self.format(record)) - p.ltrim(self.list_name, -self.max_messages, -1) - p.execute() - else: - self.redis_client.rpush(self.list_name, self.format(record)) - except redis.RedisError: - pass diff --git a/requirements/logging.txt b/requirements/logging.txt index 3f4270d74..48b90f75c 100644 --- a/requirements/logging.txt +++ b/requirements/logging.txt @@ -1 +1,2 @@ colorlog>=2.4.0 +python-json-logger>=0.1.5 diff --git a/requirements/tests.txt b/requirements/tests.txt index 9a34088ea..391093b9a 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -12,3 +12,4 @@ pytest-cov happybase>=1.0.0 mock boto>=2.42.0 +-r logging.txt diff --git a/setup.py b/setup.py index 3d834fb14..11c9256da 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ ], 'logging': [ 'colorlog>=2.4.0', + 'python-json-logger>=0.1.5' ], 'tldextract': [ 'tldextract>=1.5.1', @@ -82,6 +83,8 @@ "SQLAlchemy>=1.0.0", "cachetools", "mock", - "boto>=2.42.0" + "boto>=2.42.0", + "colorlog>=2.4.0", + "python-json-logger>=0.1.5" ] ) diff --git a/tests/test_filters.py b/tests/test_filters.py new file mode 100644 index 000000000..796faecdb --- /dev/null +++ b/tests/test_filters.py @@ -0,0 +1,122 @@ +import unittest + +from frontera.logger.filters import PLAINVALUES, INCLUDEFIELDS, EXCLUDEFIELDS +from tests.utils import LoggingCaptureMixin, SetupDefaultLoggingMixin + + +class BaseTestFilters(SetupDefaultLoggingMixin, LoggingCaptureMixin, unittest.TestCase): + def tearDown(self): + super(BaseTestFilters, self).setUp() + self.logger.handlers[0].filters = [] + + def addFilter(self, filter): + self.logger.handlers[0].addFilter(filter) + + +class TestFilterPlainValues(BaseTestFilters): + def test_plain_values_exclude_fields(self): + filter = PLAINVALUES(excluded_fields=['event']) + self.addFilter(filter) + self.logger.debug({'message1': 'logging', 'message2': 'debug', 'event': 'value'}) + log_msg = self.logger_output.getvalue() + assert log_msg == 'logging debug\n' or log_msg == 'debug logging\n' + + def test_plain_values_separator(self): + filter = PLAINVALUES(separator=',') + self.addFilter(filter) + self.logger.debug({'message1': 'logging', 'message2': 'debug'}) + log_msg = self.logger_output.getvalue() + assert log_msg == 'logging,debug\n' or log_msg == 'debug,logging\n' + + def test_plain_values_msg_max_length(self): + filter = PLAINVALUES(msg_max_length=10) + self.addFilter(filter) + self.logger.debug({'message1': '1' * 10, 'message2': '2' * 10}) + log_msg = self.logger_output.getvalue() + assert log_msg == '%s...\n' % ('1' * 7) or log_msg == '%s...\n' % ('2' * 7) + + def test_plain_values_str_msg(self): + filter = PLAINVALUES(msg_max_length=10) + self.addFilter(filter) + self.logger.debug('debug message') + self.assertEqual(self.logger_output.getvalue(), 'debug message\n') + + +class TestIncludeFields(BaseTestFilters): + def test_include_fields_matching_values(self): + filter = INCLUDEFIELDS(field_name='event', included_values=['page_crawled']) + self.addFilter(filter) + self.logger.debug('crawled page P', extra={'event': 'page_crawled'}) + self.assertEqual(self.logger_output.getvalue(), 'crawled page P\n') + + def test_include_fields_non_matching_values(self): + filter = INCLUDEFIELDS(field_name='event', included_values=['links_extracted']) + self.addFilter(filter) + self.logger.debug('crawled page P', extra={'event': 'page_crawled'}) + self.assertEqual(self.logger_output.getvalue(), '') + + def test_include_fields_dict_msg_matching_values(self): + filter = INCLUDEFIELDS(field_name='event', included_values=['page_crawled']) + self.addFilter(filter) + self.logger.debug({'message': 'debug message', 'event': 'page_crawled'}) + log_msg = self.logger_output.getvalue() + assert log_msg == "{'event': 'page_crawled', 'message': 'debug message'}\n" or \ + log_msg == "{'message': 'debug message', 'event': 'page_crawled'}\n" + + def test_include_fields_dict_msg_non_matching_values(self): + filter = INCLUDEFIELDS(field_name='event', included_values=['links_extracted']) + self.addFilter(filter) + self.logger.debug({'message': 'debug message', 'event': 'page_crawled'}) + self.assertEqual(self.logger_output.getvalue(), '') + + def test_include_fields_field_name_none(self): + filter = INCLUDEFIELDS(field_name=None, included_values=[]) + self.addFilter(filter) + self.logger.debug('debug message') + self.assertEqual(self.logger_output.getvalue(), 'debug message\n') + + def test_include_fields_list_message(self): + filter = INCLUDEFIELDS(field_name='event', included_values=['page_crawled']) + self.addFilter(filter) + self.logger.debug(['debug message']) + self.assertEqual(self.logger_output.getvalue(), "['debug message']\n") + + +class TestExcludeFields(BaseTestFilters): + def test_exclude_fields_matching_values(self): + filter = EXCLUDEFIELDS(field_name='event', excluded_fields=['page_crawled']) + self.addFilter(filter) + self.logger.debug('crawled page P', extra={'event': 'page_crawled'}) + self.assertEqual(self.logger_output.getvalue(), '') + + def test_exclude_fields_non_matching_values(self): + filter = EXCLUDEFIELDS(field_name='event', excluded_fields=['links_extracted']) + self.addFilter(filter) + self.logger.debug('crawled page P', extra={'event': 'page_crawled'}) + self.assertEqual(self.logger_output.getvalue(), 'crawled page P\n') + + def test_exclude_fields_dict_msg_matching_values(self): + filter = EXCLUDEFIELDS(field_name='event', excluded_fields='page_crawled') + self.addFilter(filter) + self.logger.debug({'message': 'debug message', 'event': 'page_crawled'}) + self.assertEqual(self.logger_output.getvalue(), '') + + def test_exclude_fields_dict_msg_non_matching_values(self): + filter = EXCLUDEFIELDS(field_name='event', excluded_fields='links_extracted') + self.addFilter(filter) + self.logger.debug({'message': 'debug message', 'event': 'page_crawled'}) + log_msg = self.logger_output.getvalue() + assert log_msg == "{'event': 'page_crawled', 'message': 'debug message'}\n" or \ + log_msg == "{'message': 'debug message', 'event': 'page_crawled'}\n" + + def test_include_fields_field_name_none(self): + filter = EXCLUDEFIELDS(field_name=None, excluded_fields=[]) + self.addFilter(filter) + self.logger.debug('debug message') + self.assertEqual(self.logger_output.getvalue(), 'debug message\n') + + def test_include_fields_list_message(self): + filter = EXCLUDEFIELDS(field_name='event', excluded_fields=['page_crawled']) + self.addFilter(filter) + self.logger.debug(['debug message']) + self.assertEqual(self.logger_output.getvalue(), "['debug message']\n") diff --git a/tests/test_formatters.py b/tests/test_formatters.py new file mode 100644 index 000000000..065fab2fd --- /dev/null +++ b/tests/test_formatters.py @@ -0,0 +1,116 @@ +import unittest +import re +import json +import datetime + +from frontera.logger.formatters.color import ColorFormatter +from frontera.logger.formatters.json import JSONFormatter +from frontera.logger.formatters import CONSOLE + +from tests.utils import LoggingCaptureMixin, SetupDefaultLoggingMixin, colors + + +class BaseTestFormatters(SetupDefaultLoggingMixin, LoggingCaptureMixin, unittest.TestCase): + def setUp(self): + super(BaseTestFormatters, self).setUp() + self.default_formatter = self.logger.handlers[0].formatter + + def tearDown(self): + super(BaseTestFormatters, self).setUp() + self.logger.handlers[0].formatter = self.default_formatter + + def setFormatter(self, formatter): + self.logger.handlers[0].setFormatter(formatter) + + +class TestFormatterColor(BaseTestFormatters): + + def test_formatter_color(self): + c = ColorFormatter( + format="%(log_color)s [%(name)s] %(message)s", + log_colors={ + "DEBUG": "white", + "INFO": "green", + "ERROR": "red", + }, + log_color_field="levelname") + self.setFormatter(c) + self.logger.debug('debug message') + self.logger.info('info message') + self.logger.error('error message') + self.assertEqual(self.logger_output.getvalue(), + '{white} [frontera] debug message{reset}\n' + '{green} [frontera] info message{reset}\n' + '{red} [frontera] error message{reset}\n'.format(white=colors['white'], + green=colors['green'], + red=colors['red'], + reset=colors['reset'])) + + def test_formatter_color_datefmt(self): + c = ColorFormatter( + format="%(log_color)s %(asctime)s [%(name)s] %(message)s", + log_colors={ + "DEBUG": "white", + "INFO": "green", + "ERROR": "red", + }, + datefmt='%d-%m-%Y %H:%M:%S', + log_color_field="levelname") + self.setFormatter(c) + self.logger.debug('debug message') + self.assertRegexpMatches(self.logger_output.getvalue(), + '{white} \d{{2}}-\d{{2}}-\d{{4}} \d{{2}}:\d{{2}}:\d{{2}} ' + '\\[frontera\\] debug message{reset}\n'.format( + white=re.escape(colors['white']), + reset=re.escape(colors['reset']))) + + +class TestFormatterJson(BaseTestFormatters): + + def setUp(self): + super(TestFormatterJson, self).setUp() + self.setFormatter(JSONFormatter()) + + def test_formatter_json_log_text(self): + self.logger.debug('debug message') + self.assertEqual(json.loads(self.logger_output.getvalue())['message'], 'debug message') + + def test_formatter_json_log_dict(self): + dct_msg = { + 'message': 'debug message', + 'extra': 'value', + } + self.logger.debug(dct_msg) + json_log = json.loads(self.logger_output.getvalue()) + self.assertEqual(json_log.get('message'), 'debug message') + self.assertEqual(json_log.get('extra'), 'value') + + def test_formatter_json_log_datetime_objects(self): + dct_msg = { + 'message': 'debug message', + 'datetime': datetime.datetime(2016, 9, 19, 23, 59), + 'date': datetime.date(2016, 9, 20), + 'timedelta': datetime.datetime(2016, 9, 19, 23, 59) - datetime.datetime(2016, 9, 19, 23, 50), + } + self.logger.debug(dct_msg) + json_log = json.loads(self.logger_output.getvalue()) + self.assertEqual(json_log.get('message'), 'debug message') + self.assertEqual(json_log.get('datetime'), '2016-09-19T23:59:00') + self.assertEqual(json_log.get('date'), '2016-09-20') + self.assertEqual(json_log.get('timedelta'), '00:09:00') + + +class TestFormatterMiscellaneous(BaseTestFormatters): + + def test_formatter_console(self): + self.setFormatter(CONSOLE) + self.logger.debug('debug message') + self.logger.info('info message') + self.logger.error('error message') + self.assertEqual(self.logger_output.getvalue(), + '{white}[frontera] debug message{reset}\n' + '{green}[frontera] info message{reset}\n' + '{red}[frontera] error message{reset}\n'.format(white=colors['white'], + green=colors['green'], + red=colors['red'], + reset=colors['reset'])) diff --git a/tests/test_handlers.py b/tests/test_handlers.py new file mode 100644 index 000000000..deda7ba9d --- /dev/null +++ b/tests/test_handlers.py @@ -0,0 +1,30 @@ +import unittest +import logging +import logging.config + +from frontera.logger.handlers import CONSOLE +from tests.utils import SetupDefaultLoggingMixin, LoggingCaptureMixin, colors + + +class SetupHandler(SetupDefaultLoggingMixin): + @classmethod + def setUpClass(cls): + super(SetupHandler, cls).setUpClass() + l = logging.getLogger('frontera') + l.handlers[0] = cls.handler + + +class TestHandlerConsole(SetupHandler, LoggingCaptureMixin, unittest.TestCase): + handler = CONSOLE + + def test_handler_color_based_on_level(self): + self.logger.debug('debug message') + self.logger.info('info message') + self.logger.error('error message') + self.assertEqual(self.logger_output.getvalue(), + '{white}[frontera] debug message{reset}\n' + '{green}[frontera] info message{reset}\n' + '{red}[frontera] error message{reset}\n'.format(white=colors['white'], + green=colors['green'], + red=colors['red'], + reset=colors['reset'])) diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 000000000..bea9fe690 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,61 @@ +import six +import logging +import logging.config + +colors = { + 'bold_yellow': '\x1b[01;33m', + 'green': '\x1b[32m', + 'red': '\x1b[31m', + 'reset': '\x1b[0m', + 'white': '\x1b[37m', +} + +DEFAULT_LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'message': { + 'format': '%(message)s' + } + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', + 'class': 'logging.StreamHandler', + 'formatter': 'message', + } + }, + 'loggers': { + 'frontera': { + 'handlers': ['console'], + 'level': 'DEBUG', + }, + } +} + + +class LoggingCaptureMixin(object): + """ + Capture the output from the 'frontera' logger and store it on the class's + logger_output attribute. + """ + + def setUp(self): + self.logger = logging.getLogger('frontera') + self.old_stream = self.logger.handlers[0].stream + self.logger_output = six.StringIO() + self.logger.handlers[0].stream = self.logger_output + + def tearDown(self): + self.logger.handlers[0].stream = self.old_stream + + +class SetupDefaultLoggingMixin(object): + @classmethod + def setUpClass(cls): + super(SetupDefaultLoggingMixin, cls).setUpClass() + logging.config.dictConfig(DEFAULT_LOGGING) + + @classmethod + def tearDownClass(cls): + super(SetupDefaultLoggingMixin, cls).tearDownClass()