diff --git a/.github/workflows/update_test_file_ratings.yml b/.github/workflows/update_test_file_ratings.yml index e3321663bb..c4b01f19dc 100644 --- a/.github/workflows/update_test_file_ratings.yml +++ b/.github/workflows/update_test_file_ratings.yml @@ -104,3 +104,26 @@ jobs: user_email: "test-infra@pytorch.org" user_name: "Pytorch Test Infra" commit_message: "Updating TD heuristic: historical edited files" + + update-ec2-pricing: + runs-on: linux.large + steps: + - name: Checkout test-infra repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Install Dependencies + run: python3 -m pip install boto3==1.19.12 PyYAML==6.0 + + - name: Generate EC2 pricing data + run: | + python3 tools/torchci/ec2_pricing.py --output ec2_pricing.json + + - name: Compress pricing file + run: | + gzip ec2_pricing.json + + - name: Upload pricing file to S3 + run: | + aws s3 cp ec2_pricing.json.gz s3://ossci-metrics/ec2_pricing.json.gz \ + --content-encoding gzip \ + --content-type application/json diff --git a/tools/torchci/ec2_pricing.py b/tools/torchci/ec2_pricing.py new file mode 100644 index 0000000000..344e0cf264 --- /dev/null +++ b/tools/torchci/ec2_pricing.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +EC2 Pricing Map Generator + +Generates a pricing map for EC2 instances by reading .github/scale-config.yml +and fetching current AWS pricing data. +""" + +import argparse +import json +from functools import lru_cache +from typing import Optional + +import requests +import yaml + + +def gen_pricing_map(output_file: str) -> None: + """Generate pricing map from scale-config.yml and write to output file.""" + with open(".github/scale-config.yml", "r") as f: + config = yaml.safe_load(f) + + runner_types = config.get("runner_types", {}) + pricing_data = [] + + for runner_type, runner_config in runner_types.items(): + instance_type = runner_config.get("instance_type", "") + os_type = runner_config.get("os", "linux") # Default to linux if not specified + price = get_price(instance_type, os_type) or 0.0 + pricing_data.append([runner_type, instance_type, price]) + + # Write to file + with open(output_file, "w") as f: + for row in pricing_data: + f.write(json.dumps(row)) + f.write("\n") + + print(f"Output written to {output_file}") + + +@lru_cache +def get_all_pricing_data() -> dict: + """Fetch the entire EC2 pricing data from AWS pricing API. Cached for efficiency.""" + price_list_url = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/us-east-1/index.json" + response = requests.get(price_list_url) + response.raise_for_status() + return response.json() + + +def get_price(instance_type, os_type="linux") -> Optional[float]: + """Fetch on-demand price for EC2 instance type using AWS public pricing data. Returns None if not found.""" + + # Map os_type to AWS pricing API values + operating_system = "Windows" if os_type.lower() == "windows" else "Linux" + + # Get the cached pricing data + pricing_data = get_all_pricing_data() + + # Search through the products to find matching instance + for product_sku, product_data in pricing_data.get("products", {}).items(): + attributes = product_data.get("attributes", {}) + + if ( + attributes.get("instanceType") == instance_type + and attributes.get("location") == "US East (N. Virginia)" + and attributes.get("operatingSystem") == operating_system + and attributes.get("preInstalledSw") == "NA" + and attributes.get("tenancy") == "Shared" + and attributes.get("usagetype", "").startswith("BoxUsage") + ): + # Found the product, now get the pricing terms + terms = ( + pricing_data.get("terms", {}).get("OnDemand", {}).get(product_sku, {}) + ) + + for term_data in terms.values(): + price_dimensions = term_data.get("priceDimensions", {}) + for price_data in price_dimensions.values(): + price_per_unit = price_data.get("pricePerUnit", {}).get("USD") + if price_per_unit: + return float(price_per_unit) + + print(f"No pricing found for {instance_type} ({operating_system})") + return None + + +def main(): + """Parse command-line arguments and generate EC2 pricing map.""" + parser = argparse.ArgumentParser( + description="Generate EC2 pricing map from scale-config.yml" + ) + parser.add_argument( + "--output", + "-o", + type=str, + default="ec2_pricing.json", + help="Output file path (default: ec2_pricing.json)", + ) + args = parser.parse_args() + + gen_pricing_map(args.output) + + +if __name__ == "__main__": + main()