Skip to content

[Logging] Add fuzz-related logs context to corpus pruning task #4774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ def _extract_coverage_information(context, result):
return coverage_info


def utask_main(uworker_input):
def _utask_main(uworker_input):
"""Execute corpus pruning task."""
fuzz_target = uworker_io.entity_from_protobuf(
uworker_input.corpus_pruning_task_input.fuzz_target,
Expand Down Expand Up @@ -1073,6 +1073,16 @@ def utask_main(uworker_input):
return uworker_output


def utask_main(uworker_input):
"""Sets logs context and executes corpus pruning task."""
fuzz_target = uworker_io.entity_from_protobuf(
uworker_input.corpus_pruning_task_input.fuzz_target,
data_types.FuzzTarget)
with logs.fuzz_log_context(uworker_input.fuzzer_name, uworker_input.job_type,
fuzz_target):
return _utask_main(uworker_input)


def handle_corpus_pruning_failures(output: uworker_msg_pb2.Output): # pylint: disable=no-member
task_name = (f'corpus_pruning_{output.uworker_input.fuzzer_name}_'
f'{output.uworker_input.job_type}')
Expand Down Expand Up @@ -1104,7 +1114,7 @@ def _create_backup_urls(fuzz_target: data_types.FuzzTarget,
corpus_pruning_task_input.dated_backup_signed_url = dated_backup_signed_url


def utask_preprocess(fuzzer_name, job_type, uworker_env):
def _utask_preprocess(fuzzer_name, job_type, uworker_env):
"""Runs preprocessing for corpus pruning task."""
fuzz_target = data_handler.get_fuzz_target(fuzzer_name)

Expand Down Expand Up @@ -1170,6 +1180,13 @@ def utask_preprocess(fuzzer_name, job_type, uworker_env):
corpus_pruning_task_input=corpus_pruning_task_input)


def utask_preprocess(fuzzer_name, job_type, uworker_env):
"""Sets logs context and runs preprocessing for corpus pruning task."""
fuzz_target = data_handler.get_fuzz_target(fuzzer_name)
with logs.fuzz_log_context(fuzzer_name, job_type, fuzz_target):
return _utask_preprocess(fuzzer_name, job_type, uworker_env)


_ERROR_HANDLER = uworker_handle_errors.CompositeErrorHandler({
uworker_msg_pb2.ErrorType.CORPUS_PRUNING_FUZZER_SETUP_FAILED: # pylint: disable=no-member
uworker_handle_errors.noop_handler,
Expand Down Expand Up @@ -1198,7 +1215,7 @@ def _update_latest_backup(output):
f'{latest_backup_gcs_url}.')


def utask_postprocess(output):
def _utask_postprocess(output):
"""Trusted: Handles errors and writes anything needed to the db."""
if output.error_type != uworker_msg_pb2.ErrorType.NO_ERROR: # pylint: disable=no-member
_ERROR_HANDLER.handle(output)
Expand All @@ -1211,3 +1228,12 @@ def utask_postprocess(output):
_save_coverage_information(output)
_process_corpus_crashes(output)
data_handler.update_task_status(task_name, data_types.TaskState.FINISHED)


def utask_postprocess(output):
"""Sets logs context and runs postprocess for corpus pruning task."""
fuzzer_name = output.uworker_input.fuzzer_name
job_type = output.uworker_input.job_type
fuzz_target = data_handler.get_fuzz_target(fuzzer_name)
with logs.fuzz_log_context(fuzzer_name, job_type, fuzz_target):
return _utask_postprocess(output)
85 changes: 62 additions & 23 deletions src/clusterfuzz/_internal/metrics/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,21 +695,25 @@ class TaskLogStruct(NamedTuple):
stage: str


class TestcaseLogStruct(NamedTuple):
testcase_id: str
class FuzzLogStruct(NamedTuple):
fuzz_target: str
job: str
fuzzer: str


class TestcaseLogStruct(NamedTuple):
testcase_id: str


class LogContextType(enum.Enum):
"""Log context types
This is the way to define the context for a given entrypoint
and this context is used for define the adicional labels
to be added to the log.
"""Log context types.

This is the way to define the context for a given entrypoint and this
context is used for define the adicional labels to be added to the log.
"""
COMMON = 'common'
TASK = 'task'
FUZZ = 'fuzz'
TESTCASE = 'testcase'

def get_extras(self) -> NamedTuple:
Expand Down Expand Up @@ -750,6 +754,18 @@ def get_extras(self) -> NamedTuple:
error('Error retrieving context for task logs.', ignore_context=True)
return GenericLogStruct()

if self == LogContextType.FUZZ:
try:
return FuzzLogStruct(
fuzzer=log_contexts.meta.get('fuzzer_name', 'null'),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WHy not return None instead of "null"?

Copy link
Collaborator Author

@ViniciustCosta ViniciustCosta Apr 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure how the gcp logging treats fields that are set to None, my guess is that it omits that field. IMO it's better to have this default value meaning that the information is missing/not applicable than omitting it, which could mean that something went wrong during logging.
( I also used this piece of code as reference: https://github.com/google/clusterfuzz/blob/master/src/clusterfuzz/_internal/metrics/logs.py#L420 )

job=log_contexts.meta.get('job_type', 'null'),
fuzz_target=log_contexts.meta.get('fuzz_target', 'null'))
except:
error(
'Error retrieving context for fuzzing-based logs.',
ignore_context=True)
return GenericLogStruct()

if self == LogContextType.TESTCASE:
try:
testcase: 'Testcase | None' = log_contexts.meta.get('testcase')
Expand All @@ -759,19 +775,8 @@ def get_extras(self) -> NamedTuple:
ignore_context=True)
return GenericLogStruct()

fuzz_target: 'FuzzTarget | None' = log_contexts.meta.get('fuzz_target')
if fuzz_target and fuzz_target.binary:
fuzz_target_bin = fuzz_target.binary
else:
fuzz_target_bin = testcase.get_metadata('fuzzer_binary_name',
'unknown')
return TestcaseLogStruct(testcase_id=testcase.key.id()) # type: ignore

return TestcaseLogStruct(
testcase_id=testcase.key.id(), # type: ignore
fuzz_target=fuzz_target_bin, # type: ignore
job=testcase.job_type, # type: ignore
fuzzer=testcase.fuzzer_name # type: ignore
)
except:
error(
'Error retrieving context for testcase-based logs.',
Expand Down Expand Up @@ -800,7 +805,7 @@ def __call__(cls, *args, **kwargs):


class LogContexts(metaclass=Singleton):
"""Class to keep the log contexts and metadata"""
"""Class to keep the log contexts and metadata."""

def __init__(self):
self.contexts: list[LogContextType] = [LogContextType.COMMON]
Expand Down Expand Up @@ -844,7 +849,7 @@ def wrap_log_context(contexts: list[LogContextType]):

@contextlib.contextmanager
def task_stage_context(stage: Stage):
"""Creates a task context for a given stage"""
"""Creates a task context for a given stage."""
with wrap_log_context(contexts=[LogContextType.TASK]):
try:
log_contexts.add_metadata('stage', stage)
Expand All @@ -859,6 +864,29 @@ def task_stage_context(stage: Stage):
log_contexts.delete_metadata('stage')


@contextlib.contextmanager
def fuzz_log_context(fuzzer_name: str, job_type: str,
fuzz_target: 'FuzzTarget | None'):
"""Creates a fuzz context for a given fuzzer, job, and target (optional)."""
with wrap_log_context(contexts=[LogContextType.FUZZ]):
try:
if fuzz_target and fuzz_target.binary:
fuzz_target_bin = fuzz_target.binary
else:
fuzz_target_bin = 'unknown'
log_contexts.add_metadata('fuzz_target', fuzz_target_bin)
log_contexts.add_metadata('fuzzer_name', fuzzer_name)
log_contexts.add_metadata('job_type', job_type)
yield
except Exception as e:
warning(message='Error during fuzz context.')
raise e
finally:
log_contexts.delete_metadata('fuzz_target')
log_contexts.delete_metadata('fuzzer_name')
log_contexts.delete_metadata('job_type')


@contextlib.contextmanager
def testcase_log_context(testcase: 'Testcase',
fuzz_target: 'FuzzTarget | None'):
Expand All @@ -868,10 +896,19 @@ def testcase_log_context(testcase: 'Testcase',
the task's stage. In trusted part, it can be retrieved by querying the DB,
while in untrusted part is only accessible through the protobuf.
"""
with wrap_log_context(contexts=[LogContextType.TESTCASE]):
with wrap_log_context(
contexts=[LogContextType.FUZZ, LogContextType.TESTCASE]):
try:
log_contexts.add_metadata('testcase', testcase)
log_contexts.add_metadata('fuzz_target', fuzz_target)
log_contexts.add_metadata('testcase', testcase) # type: ignore
if testcase:
log_contexts.add_metadata('fuzzer_name', testcase.fuzzer_name)
log_contexts.add_metadata('job_type', testcase.job_type)
if fuzz_target and fuzz_target.binary:
fuzz_target_bin = fuzz_target.binary
else:
fuzz_target_bin = testcase.get_metadata('fuzzer_binary_name',
'unknown')
log_contexts.add_metadata('fuzz_target', fuzz_target_bin)
yield
except Exception as e:
# Logging as a warning because this error will be handled
Expand All @@ -881,4 +918,6 @@ def testcase_log_context(testcase: 'Testcase',
raise e
finally:
log_contexts.delete_metadata('testcase')
log_contexts.delete_metadata('fuzzer_name')
log_contexts.delete_metadata('job_type')
log_contexts.delete_metadata('fuzz_target')
77 changes: 64 additions & 13 deletions src/clusterfuzz/_internal/tests/core/metrics/logs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ def test_error(self):
})

def test_common_context_logs(self):
"""Test logs common context is instanced once for distinct modules."""
"""Test that logs common context is instanced once for distinct modules."""
logger = mock.MagicMock()
self.mock.get_logger.return_value = logger

Expand Down Expand Up @@ -590,23 +590,25 @@ def test_testcase_log_context(self):
fuzz_target = data_types.FuzzTarget(
id='libFuzzer_abc', engine='libFuzzer', binary='abc')
fuzz_target.put()
self.mock.get_fuzz_target.return_value = fuzz_target
testcase = data_types.Testcase(
fuzzer_name="test_fuzzer", job_type='test_job')
testcase.put()

with logs.testcase_log_context(testcase, fuzz_target):
self.assertEqual(
logs.log_contexts.contexts,
[logs.LogContextType.COMMON, logs.LogContextType.TESTCASE])
self.assertEqual(logs.log_contexts.contexts, [
logs.LogContextType.COMMON, logs.LogContextType.FUZZ,
logs.LogContextType.TESTCASE
])
statement_line = inspect.currentframe().f_lineno + 1
logs.emit(logging.ERROR, 'msg', exc_info='ex', target='bot', test='yes')
# Assert metadata after emit to ensure that `common_ctx` has been added.
self.assertEqual(
logs.log_contexts.meta, {
'common_ctx': self.common_context,
'testcase': testcase,
'fuzz_target': fuzz_target
'fuzz_target': fuzz_target.binary,
'fuzzer_name': testcase.fuzzer_name,
'job_type': testcase.job_type
})

logs_extra = {'target': 'bot', 'test': 'yes'}
Expand Down Expand Up @@ -739,13 +741,13 @@ def test_log_ignore_context(self):
},
})

def test_missing_fuzz_target_in_log_context(self):
def test_missing_fuzz_target_in_testcase_context(self):
"""Test the testcase-based log context when the fuzz target is missing."""
from clusterfuzz._internal.datastore import data_types
logger = mock.MagicMock()
self.mock.get_logger.return_value = logger
testcase = data_types.Testcase(
fuzzer_name="test_fuzzer", job_type='test_job')
fuzzer_name='test_fuzzer', job_type='test_job')
# Set this metadata to be used instead of the fuzz_target entity.
testcase.set_metadata('fuzzer_binary_name', 'fuzz_abc')
testcase.put()
Expand All @@ -760,18 +762,67 @@ def test_missing_fuzz_target_in_log_context(self):
logs_extra.update(self.common_context)

with logs.testcase_log_context(testcase, None):
self.assertEqual(
logs.log_contexts.contexts,
[logs.LogContextType.COMMON, logs.LogContextType.TESTCASE])
self.assertEqual(logs.log_contexts.contexts, [
logs.LogContextType.COMMON, logs.LogContextType.FUZZ,
logs.LogContextType.TESTCASE
])
statement_line = inspect.currentframe().f_lineno + 1
logs.emit(logging.ERROR, 'msg', exc_info='ex', target='bot', test='yes')
self.assertEqual(
logs.log_contexts.meta, {
'common_ctx': self.common_context,
'testcase': testcase,
'fuzz_target': None
'fuzz_target': testcase.get_metadata('fuzzer_binary_name'),
'fuzzer_name': testcase.fuzzer_name,
'job_type': testcase.job_type
})

logger.log.assert_called_with(
logging.ERROR,
'msg',
exc_info='ex',
extra={
'extras': logs_extra,
'location': {
'path': os.path.abspath(__file__).rstrip('c'),
'line': statement_line,
'method': 'test_missing_fuzz_target_in_testcase_context'
},
})

def test_fuzz_logs_context(self):
"""Test the correct logger call for the fuzz-based log context."""
from clusterfuzz._internal.datastore import data_types
logger = mock.MagicMock()
self.mock.get_logger.return_value = logger

fuzz_target = data_types.FuzzTarget(
id='libFuzzer_abc', engine='libFuzzer', binary='abc')
fuzz_target.put()
fuzzer_name = 'test_fuzzer'
job_type = 'test_job'

with logs.fuzz_log_context(fuzzer_name, job_type, fuzz_target):
self.assertEqual(logs.log_contexts.contexts,
[logs.LogContextType.COMMON, logs.LogContextType.FUZZ])
statement_line = inspect.currentframe().f_lineno + 1
logs.emit(logging.ERROR, 'msg', exc_info='ex', target='bot', test='yes')
# Assert metadata after emit to ensure that `common_ctx` has been added.
self.assertEqual(
logs.log_contexts.meta, {
'common_ctx': self.common_context,
'fuzz_target': fuzz_target.binary,
'fuzzer_name': fuzzer_name,
'job_type': job_type
})

logs_extra = {'target': 'bot', 'test': 'yes'}
logs_extra.update(self.common_context)
logs_extra.update({
'fuzz_target': 'abc',
'job': 'test_job',
'fuzzer': 'test_fuzzer'
})
logger.log.assert_called_with(
logging.ERROR,
'msg',
Expand All @@ -781,7 +832,7 @@ def test_missing_fuzz_target_in_log_context(self):
'location': {
'path': os.path.abspath(__file__).rstrip('c'),
'line': statement_line,
'method': 'test_missing_fuzz_target_in_log_context'
'method': 'test_fuzz_logs_context'
},
})

Expand Down
Loading