Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from dataclasses import dataclass, field
from typing import Any, Dict, List

import requests


# The data class to provide api response model from get_time_series api


@dataclass
class TimeRange:
start: str
end: str


@dataclass
class BenchmarkTimeSeriesItem:
group_info: Dict[str, Any]
num_of_dp: int
data: List[Dict[str, Any]] = field(default_factory=list)


@dataclass
class BenchmarkTimeSeriesApiData:
time_series: List[BenchmarkTimeSeriesItem]
time_range: TimeRange


@dataclass
class BenchmarkTimeSeriesApiResponse:
data: BenchmarkTimeSeriesApiData

@classmethod
def from_request(
cls, url: str, query: dict, timeout: int = 180
) -> "BenchmarkTimeSeriesApiResponse":
"""
Send a POST request and parse into BenchmarkTimeSeriesApiResponse.

Args:
url: API endpoint
query: JSON payload must
timeout: max seconds to wait for connect + response (default: 30)
Returns:
ApiResponse
Raises:
requests.exceptions.RequestException if network/timeout/HTTP error
RuntimeError if the API returns an "error" field or malformed data
"""
resp = requests.post(url, json=query, timeout=timeout)
resp.raise_for_status()
payload = resp.json()

if "error" in payload:
raise RuntimeError(f"API error: {payload['error']}")
try:
tr = TimeRange(**payload["data"]["time_range"])
ts = [
BenchmarkTimeSeriesItem(**item)
for item in payload["data"]["time_series"]
]
except Exception as e:
raise RuntimeError(f"Malformed API payload: {e}")
return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr))
94 changes: 94 additions & 0 deletions aws/lambda/benchmark_regression_summary_report/common/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from common.config_model import (
BenchmarkApiSource,
BenchmarkConfig,
BenchmarkRegressionConfigBook,
DayRangeWindow,
Frequency,
Policy,
RangeConfig,
RegressionPolicy,
)


# Compiler benchmark regression config
# todo(elainewy): eventually each team should configure
# their own benchmark regression config, currenlty place
# here for lambda


COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
name="Compiler Benchmark Regression",
id="compiler_regression",
source=BenchmarkApiSource(
api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series",
type="benchmark_time_series_api",
# currently we only detect the regression for h100 with dtype bfloat16, and mode inference
# we can extend this to other devices, dtypes and mode in the future
api_endpoint_params_template="""
{
"name": "compiler_precompute",
"query_params": {
"commits": [],
"compilers": [],
"arch": "h100",
"device": "cuda",
"dtype": "bfloat16",
"granularity": "hour",
"mode": "inference",
"startTime": "{{ startTime }}",
"stopTime": "{{ stopTime }}",
"suites": ["torchbench", "huggingface", "timm_models"],
"workflowId": 0,
"branches": ["main"]
}
}
""",
),
# set baseline from past 7 days using avg, and compare with the last 1 day
policy=Policy(
frequency=Frequency(value=1, unit="days"),
range=RangeConfig(
baseline=DayRangeWindow(value=7),
comparison=DayRangeWindow(value=2),
),
metrics={
"passrate": RegressionPolicy(
name="passrate",
condition="greater_equal",
threshold=0.9,
baseline_aggregation="max",
),
"geomean": RegressionPolicy(
name="geomean",
condition="greater_equal",
threshold=0.95,
baseline_aggregation="max",
),
"compression_ratio": RegressionPolicy(
name="compression_ratio",
condition="greater_equal",
threshold=0.9,
baseline_aggregation="max",
),
},
notification_config={
"type": "github",
"repo": "pytorch/test-infra",
"issue": "7081",
},
),
)

BENCHMARK_REGRESSION_CONFIG = BenchmarkRegressionConfigBook(
configs={
"compiler_regression": COMPILER_BENCHMARK_CONFIG,
}
)


def get_benchmark_regression_config(config_id: str) -> BenchmarkConfig:
"""Get benchmark regression config by config id"""
try:
return BENCHMARK_REGRESSION_CONFIG[config_id]
except KeyError:
raise ValueError(f"Invalid config id: {config_id}")
Loading