pytorch · yangw-dev · Sep 9, 2025 · Sep 3, 2025 · Sep 3, 2025 · Sep 3, 2025
diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
@@ -0,0 +1,64 @@
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+
+import requests
+
+
+# The data class to provide api response model from get_time_series api
+
+
+@dataclass
+class TimeRange:
+    start: str
+    end: str
+
+
+@dataclass
+class BenchmarkTimeSeriesItem:
+    group_info: Dict[str, Any]
+    num_of_dp: int
+    data: List[Dict[str, Any]] = field(default_factory=list)
+
+
+@dataclass
+class BenchmarkTimeSeriesApiData:
+    time_series: List[BenchmarkTimeSeriesItem]
+    time_range: TimeRange
+
+
+@dataclass
+class BenchmarkTimeSeriesApiResponse:
+    data: BenchmarkTimeSeriesApiData
+
+    @classmethod
+    def from_request(
+        cls, url: str, query: dict, timeout: int = 180
+    ) -> "BenchmarkTimeSeriesApiResponse":
+        """
+        Send a POST request and parse into BenchmarkTimeSeriesApiResponse.
+
+        Args:
+            url: API endpoint
+            query: JSON payload must
+            timeout: max seconds to wait for connect + response (default: 30)
+        Returns:
+            ApiResponse
+        Raises:
+            requests.exceptions.RequestException if network/timeout/HTTP error
+            RuntimeError if the API returns an "error" field or malformed data
+        """
+        resp = requests.post(url, json=query, timeout=timeout)
+        resp.raise_for_status()
+        payload = resp.json()
+
+        if "error" in payload:
+            raise RuntimeError(f"API error: {payload['error']}")
+        try:
+            tr = TimeRange(**payload["data"]["time_range"])
+            ts = [
+                BenchmarkTimeSeriesItem(**item)
+                for item in payload["data"]["time_series"]
+            ]
+        except Exception as e:
+            raise RuntimeError(f"Malformed API payload: {e}")
+        return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr))
diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py
@@ -0,0 +1,94 @@
+from common.config_model import (
+    BenchmarkApiSource,
+    BenchmarkConfig,
+    BenchmarkRegressionConfigBook,
+    DayRangeWindow,
+    Frequency,
+    Policy,
+    RangeConfig,
+    RegressionPolicy,
+)
+
+
+# Compiler benchmark regression config
+# todo(elainewy): eventually each team should configure
+# their own benchmark regression config, currenlty place
+# here for lambda
+
+
+COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
+    name="Compiler Benchmark Regression",
+    id="compiler_regression",
+    source=BenchmarkApiSource(
+        api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series",
+        type="benchmark_time_series_api",
+        # currently we only detect the regression for h100 with dtype bfloat16, and mode inference
+        # we can extend this to other devices, dtypes and mode in the future
+        api_endpoint_params_template="""
+                {
+                  "name": "compiler_precompute",
+                  "query_params": {
+                    "commits": [],
+                    "compilers": [],
+                    "arch": "h100",
+                    "device": "cuda",
+                    "dtype": "bfloat16",
+                    "granularity": "hour",
+                    "mode": "inference",
+                    "startTime": "{{ startTime }}",
+                    "stopTime": "{{ stopTime }}",
+                    "suites": ["torchbench", "huggingface", "timm_models"],
+                    "workflowId": 0,
+                    "branches": ["main"]
+                  }
+                }
+                """,
+    ),
+    # set baseline from past 7 days using avg, and compare with the last 1 day
+    policy=Policy(
+        frequency=Frequency(value=1, unit="days"),
+        range=RangeConfig(
+            baseline=DayRangeWindow(value=7),
+            comparison=DayRangeWindow(value=2),
+        ),
+        metrics={
+            "passrate": RegressionPolicy(
+                name="passrate",
+                condition="greater_equal",
+                threshold=0.9,
+                baseline_aggregation="max",
+            ),
+            "geomean": RegressionPolicy(
+                name="geomean",
+                condition="greater_equal",
+                threshold=0.95,
+                baseline_aggregation="max",
+            ),
+            "compression_ratio": RegressionPolicy(
+                name="compression_ratio",
+                condition="greater_equal",
+                threshold=0.9,
+                baseline_aggregation="max",
+            ),
+        },
+        notification_config={
+            "type": "github",
+            "repo": "pytorch/test-infra",
+            "issue": "7081",
+        },
+    ),
+)
+
+BENCHMARK_REGRESSION_CONFIG = BenchmarkRegressionConfigBook(
+    configs={
+        "compiler_regression": COMPILER_BENCHMARK_CONFIG,
+    }
+)
+
+
+def get_benchmark_regression_config(config_id: str) -> BenchmarkConfig:
+    """Get benchmark regression config by config id"""
+    try:
+        return BENCHMARK_REGRESSION_CONFIG[config_id]
+    except KeyError:
+        raise ValueError(f"Invalid config id: {config_id}")