-
Notifications
You must be signed in to change notification settings - Fork 103
Add lambda to fetch data from api and add conifguration data model #7092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
45e083e
Update
yangw-dev 1ae56d8
Update (base update)
yangw-dev f960529
Update
yangw-dev 94fe87b
Update (base update)
yangw-dev f645759
Update
yangw-dev 8e6c57a
Update (base update)
yangw-dev 8c18648
Update
yangw-dev b1c7b74
Update
yangw-dev 2674d77
Update
yangw-dev 31e1daf
Update (base update)
yangw-dev a4a8500
Update
yangw-dev 8bb1a73
Update
yangw-dev 085292f
Update (base update)
yangw-dev 5ec4c9b
Update
yangw-dev ea3d214
Update
yangw-dev 5f7db34
Update (base update)
yangw-dev 1c65625
Update
yangw-dev fddbe90
Update (base update)
yangw-dev 539e4e0
Update
yangw-dev 838363c
Update (base update)
yangw-dev 3e7042e
Update
yangw-dev File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
64 changes: 64 additions & 0 deletions
64
aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from dataclasses import dataclass, field | ||
from typing import Any, Dict, List | ||
|
||
import requests | ||
|
||
|
||
# The data class to provide api response model from get_time_series api | ||
|
||
|
||
@dataclass | ||
class TimeRange: | ||
start: str | ||
end: str | ||
|
||
|
||
@dataclass | ||
class BenchmarkTimeSeriesItem: | ||
group_info: Dict[str, Any] | ||
num_of_dp: int | ||
data: List[Dict[str, Any]] = field(default_factory=list) | ||
|
||
|
||
@dataclass | ||
class BenchmarkTimeSeriesApiData: | ||
time_series: List[BenchmarkTimeSeriesItem] | ||
time_range: TimeRange | ||
|
||
|
||
@dataclass | ||
class BenchmarkTimeSeriesApiResponse: | ||
data: BenchmarkTimeSeriesApiData | ||
|
||
@classmethod | ||
def from_request( | ||
cls, url: str, query: dict, timeout: int = 180 | ||
) -> "BenchmarkTimeSeriesApiResponse": | ||
""" | ||
Send a POST request and parse into BenchmarkTimeSeriesApiResponse. | ||
|
||
Args: | ||
url: API endpoint | ||
query: JSON payload must | ||
timeout: max seconds to wait for connect + response (default: 30) | ||
Returns: | ||
ApiResponse | ||
Raises: | ||
requests.exceptions.RequestException if network/timeout/HTTP error | ||
RuntimeError if the API returns an "error" field or malformed data | ||
""" | ||
resp = requests.post(url, json=query, timeout=timeout) | ||
resp.raise_for_status() | ||
payload = resp.json() | ||
|
||
if "error" in payload: | ||
raise RuntimeError(f"API error: {payload['error']}") | ||
try: | ||
tr = TimeRange(**payload["data"]["time_range"]) | ||
ts = [ | ||
BenchmarkTimeSeriesItem(**item) | ||
for item in payload["data"]["time_series"] | ||
] | ||
except Exception as e: | ||
raise RuntimeError(f"Malformed API payload: {e}") | ||
return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr)) |
94 changes: 94 additions & 0 deletions
94
aws/lambda/benchmark_regression_summary_report/common/config.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
from common.config_model import ( | ||
BenchmarkApiSource, | ||
BenchmarkConfig, | ||
BenchmarkRegressionConfigBook, | ||
DayRangeWindow, | ||
Frequency, | ||
Policy, | ||
RangeConfig, | ||
RegressionPolicy, | ||
) | ||
|
||
|
||
# Compiler benchmark regression config | ||
# todo(elainewy): eventually each team should configure | ||
# their own benchmark regression config, currenlty place | ||
# here for lambda | ||
|
||
|
||
COMPILER_BENCHMARK_CONFIG = BenchmarkConfig( | ||
name="Compiler Benchmark Regression", | ||
id="compiler_regression", | ||
source=BenchmarkApiSource( | ||
api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series", | ||
type="benchmark_time_series_api", | ||
# currently we only detect the regression for h100 with dtype bfloat16, and mode inference | ||
# we can extend this to other devices, dtypes and mode in the future | ||
api_endpoint_params_template=""" | ||
{ | ||
"name": "compiler_precompute", | ||
"query_params": { | ||
"commits": [], | ||
"compilers": [], | ||
"arch": "h100", | ||
yangw-dev marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"device": "cuda", | ||
"dtype": "bfloat16", | ||
"granularity": "hour", | ||
"mode": "inference", | ||
"startTime": "{{ startTime }}", | ||
"stopTime": "{{ stopTime }}", | ||
"suites": ["torchbench", "huggingface", "timm_models"], | ||
"workflowId": 0, | ||
"branches": ["main"] | ||
} | ||
} | ||
""", | ||
), | ||
# set baseline from past 7 days using avg, and compare with the last 1 day | ||
policy=Policy( | ||
frequency=Frequency(value=1, unit="days"), | ||
range=RangeConfig( | ||
baseline=DayRangeWindow(value=7), | ||
comparison=DayRangeWindow(value=2), | ||
), | ||
metrics={ | ||
"passrate": RegressionPolicy( | ||
name="passrate", | ||
condition="greater_equal", | ||
threshold=0.9, | ||
baseline_aggregation="max", | ||
), | ||
"geomean": RegressionPolicy( | ||
name="geomean", | ||
condition="greater_equal", | ||
threshold=0.95, | ||
baseline_aggregation="max", | ||
), | ||
"compression_ratio": RegressionPolicy( | ||
name="compression_ratio", | ||
condition="greater_equal", | ||
threshold=0.9, | ||
baseline_aggregation="max", | ||
), | ||
}, | ||
notification_config={ | ||
"type": "github", | ||
"repo": "pytorch/test-infra", | ||
"issue": "7081", | ||
}, | ||
), | ||
) | ||
|
||
BENCHMARK_REGRESSION_CONFIG = BenchmarkRegressionConfigBook( | ||
configs={ | ||
"compiler_regression": COMPILER_BENCHMARK_CONFIG, | ||
} | ||
) | ||
|
||
|
||
def get_benchmark_regression_config(config_id: str) -> BenchmarkConfig: | ||
"""Get benchmark regression config by config id""" | ||
try: | ||
return BENCHMARK_REGRESSION_CONFIG[config_id] | ||
except KeyError: | ||
raise ValueError(f"Invalid config id: {config_id}") |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.