Skip to content

Historical Data Management OSS-Fuzz SDK Implementation #1150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
36 changes: 33 additions & 3 deletions ossfuzz_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,55 @@

from .core.benchmark_manager import Benchmark, BenchmarkManager
# Data models and enums
from .core.data_models import (CrashData, FuzzingEngine, ProjectConfig,
Sanitizer, Severity)
from .core.data_models import (BuildHistoryData, CorpusHistoryData,
CoverageHistoryData, CrashData, CrashHistoryData,
FuzzingEngine, HistoricalSummary, ProjectConfig,
Sanitizer, Severity, TimeSeriesData)
# Core SDK - Main SDK class and modules
from .core.ossfuzz_manager import OSSFuzzManager
from .core.ossfuzz_sdk import OSSFuzzSDK
from .data.storage_adapter import (FileStorageAdapter, GCSStorageAdapter,
StorageAdapter)
# Storage components
from .data.storage_manager import StorageManager
# Error handling
from .errors import *
# History managers
from .history import (BuildHistoryManager, CorpusHistoryManager,
CoverageHistoryManager, CrashHistoryManager,
HistoryManager)

# Public API - All exports available to SDK clients
__all__ = [
# Core SDK - Main classes according to UML diagram
'OSSFuzzManager',
'OSSFuzzSDK',
'BenchmarkManager',
'Benchmark',

# History managers
'HistoryManager',
'BuildHistoryManager',
'CrashHistoryManager',
'CorpusHistoryManager',
'CoverageHistoryManager',

# Storage components
'StorageManager',
'StorageAdapter',
'FileStorageAdapter',
'GCSStorageAdapter',

# Data models and enums
'Severity',
'Sanitizer',
'Sanitizer',
'FuzzingEngine',
'BuildHistoryData',
'CrashHistoryData',
'CorpusHistoryData',
'CoverageHistoryData',
'TimeSeriesData',
'HistoricalSummary',

# Core error types and enums
'ErrorCode',
Expand Down
128 changes: 127 additions & 1 deletion ossfuzz_py/core/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -133,3 +133,129 @@ def to_yaml(self, path: Path) -> bool:
return True
except Exception:
return False


class BuildHistoryData(BaseDataModel):
"""Represents a single build history entry."""
build_id: str = Field(..., description="Unique identifier for the build")
timestamp: datetime = Field(..., description="Build timestamp")
project_name: str = Field(..., description="Name of the project")
success: bool = Field(..., description="Whether the build was successful")
duration_seconds: Optional[int] = Field(
None, description="Build duration in seconds")
commit_hash: Optional[str] = Field(None, description="Git commit hash")
branch: Optional[str] = Field(None, description="Git branch")
sanitizer: Optional[Sanitizer] = Field(None, description="Sanitizer used")
architecture: Optional[str] = Field(None, description="Target architecture")
error_message: Optional[str] = Field(
None, description="Error message if build failed")
artifacts: Optional[List[str]] = Field(None,
description="List of build artifacts")


class CrashHistoryData(BaseDataModel):
"""Represents a single crash history entry."""
crash_id: str = Field(..., description="Unique identifier for the crash")
timestamp: datetime = Field(..., description="Crash timestamp")
project_name: str = Field(..., description="Name of the project")
fuzzer_name: str = Field(..., description="Name of the fuzzer")
crash_type: str = Field(
..., description="Type of crash (e.g., heap-buffer-overflow)")
crash_signature: str = Field(..., description="Crash signature/hash")
severity: Severity = Field(Severity.UNKNOWN, description="Crash severity")
reproducible: Optional[bool] = Field(
None, description="Whether crash is reproducible")
stack_trace: Optional[str] = Field(None, description="Stack trace")
testcase_path: Optional[str] = Field(None, description="Path to testcase")
regression_range: Optional[str] = Field(None, description="Regression range")


class CorpusHistoryData(BaseDataModel):
"""Represents a single corpus history entry."""
timestamp: datetime = Field(..., description="Corpus snapshot timestamp")
project_name: str = Field(..., description="Name of the project")
fuzzer_name: str = Field(..., description="Name of the fuzzer")
corpus_size: int = Field(..., description="Number of files in corpus")
total_size_bytes: int = Field(...,
description="Total size of corpus in bytes")
new_files_count: Optional[int] = Field(
None, description="Number of new files added")
coverage_increase: Optional[float] = Field(
None, description="Coverage increase percentage")
unique_features: Optional[int] = Field(
None, description="Number of unique features")


class CoverageHistoryData(BaseDataModel):
"""Represents a single coverage history entry."""
timestamp: datetime = Field(..., description="Coverage measurement timestamp")
project_name: str = Field(..., description="Name of the project")
fuzzer_name: Optional[str] = Field(None, description="Name of the fuzzer")
line_coverage: float = Field(..., description="Line coverage percentage")
function_coverage: Optional[float] = Field(
None, description="Function coverage percentage")
branch_coverage: Optional[float] = Field(
None, description="Branch coverage percentage")
lines_covered: Optional[int] = Field(None,
description="Number of lines covered")
lines_total: Optional[int] = Field(None, description="Total number of lines")
functions_covered: Optional[int] = Field(
None, description="Number of functions covered")
functions_total: Optional[int] = Field(
None, description="Total number of functions")
branches_covered: Optional[int] = Field(
None, description="Number of branches covered")
branches_total: Optional[int] = Field(None,
description="Total number of branches")


class TimeSeriesData(BaseDataModel):
"""Generic time series data container."""
project_name: str = Field(..., description="Name of the project")
data_type: str = Field(
..., description="Type of data (build, crash, corpus, coverage)")
start_date: datetime = Field(..., description="Start date of the time series")
end_date: datetime = Field(..., description="End date of the time series")
data_points: List[Dict[str,
Any]] = Field(...,
description="Time series data points")
metadata: Optional[Dict[str, Any]] = Field(None,
description="Additional metadata")


class HistoricalSummary(BaseDataModel):
"""Summary statistics for historical data."""
project_name: str = Field(..., description="Name of the project")
period_start: datetime = Field(..., description="Start of the summary period")
period_end: datetime = Field(..., description="End of the summary period")

# Build statistics
total_builds: Optional[int] = Field(None,
description="Total number of builds")
successful_builds: Optional[int] = Field(
None, description="Number of successful builds")
build_success_rate: Optional[float] = Field(
None, description="Build success rate percentage")

# Crash statistics
total_crashes: Optional[int] = Field(None,
description="Total number of crashes")
unique_crashes: Optional[int] = Field(None,
description="Number of unique crashes")
critical_crashes: Optional[int] = Field(
None, description="Number of critical crashes")

# Coverage statistics
max_coverage: Optional[float] = Field(None,
description="Maximum coverage achieved")
avg_coverage: Optional[float] = Field(None, description="Average coverage")
coverage_trend: Optional[str] = Field(
None, description="Coverage trend (increasing/decreasing/stable)")

# Corpus statistics
max_corpus_size: Optional[int] = Field(None,
description="Maximum corpus size")
avg_corpus_size: Optional[float] = Field(None,
description="Average corpus size")
corpus_growth_rate: Optional[float] = Field(None,
description="Corpus growth rate")
Loading