Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions terraform-aws-github-runner/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ module "webhook" {
module "runners" {
source = "./modules/runners"

auth_gh_app = var.auth_gh_app
auth_gh_org = var.auth_gh_org
aws_region = var.aws_region
aws_region_instances = var.aws_region_instances
vpc_ids = var.vpc_ids
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ SHELL=/bin/bash -o pipefail
.PHONY: clean
clean:
rm -rf dist node_modules coverage
rm runners.zip
rm -rf runners.zip
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised this was needed


.PHONY: build
build:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ beforeEach(() => {
jest.restoreAllMocks();
nock.disableNetConnect();

// eslint-disable-next-line @typescript-eslint/no-explicit-any
(mocked(createClient) as any).mockImplementation(produceMockedRedis);

jest.spyOn(Config, 'Instance', 'get').mockImplementation(() => config as unknown as Config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { getBoolean, shuffleArrayInPlace } from './utils';
export class Config {
private static _instance: Config | undefined;

readonly authGHOrg: string | undefined;
readonly authGHRepo: string | undefined;
readonly awsRegion: string;
readonly awsRegionInstances: string[];
readonly awsRegionsToVpcIds: Map<string, Array<string>>;
Expand Down Expand Up @@ -39,8 +41,8 @@ export class Config {
readonly scaleConfigOrg: string;
readonly scaleConfigRepo: string;
readonly scaleConfigRepoPath: string;
readonly scaleUpMaxQueueTimeMinutes: number;
readonly scaleUpChronRecordQueueUrl: string | undefined;
readonly scaleUpMaxQueueTimeMinutes: number;
readonly secretsManagerSecretsId: string | undefined;
readonly sSMParamCleanupAgeDays: number;
readonly sSMParamMaxCleanupAllowance: number;
Expand All @@ -50,6 +52,10 @@ export class Config {
readonly vpcIdToSubnetIds: Map<string, Array<string>>;

protected constructor() {
/* istanbul ignore next */
this.authGHOrg = process.env.AUTH_GH_ORG;
/* istanbul ignore next */
this.authGHRepo = process.env.AUTH_GH_REPO;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is the env var you're setting in terraform?

this.awsRegion = process.env.AWS_REGION || 'us-east-1';
/* istanbul ignore next */
this.awsRegionInstances = process.env.AWS_REGION_INSTANCES?.split(',').filter((w) => w.length > 0) || [];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export class Metrics {

/* istanbul ignore next */
protected getMetricType(metric: string): StandardUnit {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
if (Metrics.baseMetricTypes.has(metric)) return Metrics.baseMetricTypes.get(metric)!;
if (metric.endsWith('.wallclock')) return 'Milliseconds';
if (metric.endsWith('.runningWallclock')) return 'Seconds';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,21 @@ export function backwardCompatibleGetRepoForgetRunnerTypes(ec2runner: RunnerInfo
return getRepo(ec2runner.repo as string);
}

export function backwardCompatibleGetAuthRepoForGetRunnerTypes(ec2runner: RunnerInfo): Repo {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit on the term "auth" in AuthRepo (see comment on variables.tf)

if (Config.Instance.authGHOrg) {
return getRepo(Config.Instance.authGHOrg, Config.Instance.authGHRepo);
}
return ec2runner.repo ? getRepo(ec2runner.repo as string) : { owner: ec2runner.org as string, repo: '' };
}

export async function isEphemeralRunner(ec2runner: RunnerInfo, metrics: ScaleDownMetrics): Promise<boolean> {
if (ec2runner.runnerType === undefined) {
return false;
}

const runnerTypes = await getRunnerTypes(
backwardCompatibleGetRepoForgetRunnerTypes(ec2runner),
ec2runner.repo ? getRepo(ec2runner.repo as string) : { owner: ec2runner.org as string, repo: '' },
backwardCompatibleGetAuthRepoForGetRunnerTypes(ec2runner),
metrics,
);
return runnerTypes.get(ec2runner.runnerType)?.is_ephemeral ?? false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@ export async function scaleUpChron(metrics: ScaleUpChronMetrics): Promise<void>
// 3. For each runner queued for longer than the minimum delay, try to scale it up

try {
const repo = getRepo(Config.Instance.scaleConfigOrg, Config.Instance.scaleConfigRepo);
const authRepo = getRepo(
Config.Instance.authGHOrg || Config.Instance.scaleConfigOrg,
Config.Instance.authGHRepo || Config.Instance.scaleConfigRepo,
);
const scaleConfigRepo = getRepo(Config.Instance.scaleConfigOrg, Config.Instance.scaleConfigRepo);
const validRunnerTypes = await getRunnerTypes(
// For scaleUpChron, we don't have a situation where the auth repo is different from the config repo
// so we can just pass the same repo for both parameters
repo,
repo,
scaleConfigRepo,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the comment up above this line should be updated

authRepo,
metrics,
Config.Instance.scaleConfigRepoPath,
);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Metrics, ScaleUpMetrics } from './metrics';
import { Repo, getRepoKey, sleep } from './utils';
import { Repo, getRepo, getRepoKey, sleep } from './utils';
import { RunnerType, RunnerInputParameters, createRunner, tryReuseRunner, NoRunnersAvailable } from './runners';
import {
createRegistrationTokenOrg,
Expand Down Expand Up @@ -64,11 +64,12 @@ export async function scaleUp(
console.error(`Error getting GitHub rate limit: ${e}`);
}

const scaleConfigRepo = {
owner: Config.Instance.scaleConfigOrg || repo.owner,
repo: Config.Instance.scaleConfigRepo || repo.repo,
};
const runnerTypes = await getRunnerTypes(scaleConfigRepo, repo, metrics);
const authRepo = getRepo(Config.Instance.authGHOrg || repo.owner, Config.Instance.authGHRepo || repo.repo);
const scaleConfigRepo = getRepo(
Config.Instance.scaleConfigOrg || repo.owner,
Config.Instance.scaleConfigRepo || repo.repo,
);
const runnerTypes = await getRunnerTypes(scaleConfigRepo, authRepo, metrics);
/* istanbul ignore next */
const runnerLabels = payload?.runnerLabels ?? Array.from(runnerTypes.keys());

Expand Down Expand Up @@ -211,6 +212,21 @@ async function createRunnerConfigArgument(
}

async function shouldSkipForRepo(repo: Repo, metrics: Metrics): Promise<boolean> {
if (Config.Instance.authGHOrg && Config.Instance.authGHOrg !== repo.owner) {
console.warn(
`Skipping scaleUp for repo '${repo.owner}/${repo.repo}' as it is not part " +
"of the installed org '${Config.Instance.authGHOrg}'`,
);
return true;
}
if (Config.Instance.authGHRepo && Config.Instance.authGHRepo !== repo.repo) {
console.warn(
`Skipping scaleUp for repo '${repo.owner}/${repo.repo}' as it is not part " +
"of the installed repo '${Config.Instance.authGHRepo}'`,
);
return true;
}

if (Config.Instance.mustHaveIssuesLabels) {
for (let i = 0; i < Config.Instance.mustHaveIssuesLabels.length; i++) {
const label = Config.Instance.mustHaveIssuesLabels[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { SQSRecord } from 'aws-lambda';
import { expBackOff } from './utils';
import { Metrics } from './metrics';

function getQueueUrl(evt: SQSRecord, sqs: SQS) {
function getQueueUrl(evt: SQSRecord) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: There seem to be a lot of lint changes here. This PR would be way easier to review if split into a "lint only" changes PR and a "new functionality" PR

const splitARN = evt.eventSourceARN.split(':');
// arn:aws:sqs:region:account-id:queue-name
const region = splitARN[3];
Expand Down Expand Up @@ -44,7 +44,7 @@ export async function sqsChangeMessageVisibilityBatch(
) {
const sqs: SQS = new SQS();

const queueUrl = getQueueUrl(events[0], sqs);
const queueUrl = getQueueUrl(events[0]);
const parameters = {
Entries: events.map((evt) => {
return {
Expand Down Expand Up @@ -72,7 +72,7 @@ export async function sqsChangeMessageVisibilityBatch(
export async function sqsDeleteMessageBatch(metrics: Metrics, events: Array<SQSRecord>) {
const sqs: SQS = new SQS();

const queueUrl = getQueueUrl(events[0], sqs);
const queueUrl = getQueueUrl(events[0]);
const parameters = {
Entries: events.map((evt) => {
return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ describe('./utils', () => {
it('returns the repo from two strings', () => {
expect(getRepo('owner', 'repo')).toEqual({ owner: 'owner', repo: 'repo' });
});

it('throws error when repoDef is not in the correct format', () => {
expect(() => {
getRepo('owner/repo/invalid');
}).toThrowError();
});
});

describe('groupBy', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ export function getRepo(repoDef: string, repoName?: string): Repo {
}

const repoArr = repoDef.split('/');
if (repoArr.length != 2) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this no longer needed?

throw Error('getRepo: repoDef string must be in the format "owner/repo_name"');
}
return { owner: repoArr[0], repo: repoArr[1] };
} catch (e) {
console.error(`[getRepo]: ${e}`);
Expand Down
2 changes: 2 additions & 0 deletions terraform-aws-github-runner/modules/runners/scale-down.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ resource "aws_lambda_function" "scale_down" {

environment {
variables = {
AUTH_GH_APP = var.auth_gh_app
AUTH_GH_ORG = var.auth_gh_org
AWS_REGION_INSTANCES = join(",", var.aws_region_instances)
DATETIME_DEPLOY = local.datetime_deploy
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
Expand Down
6 changes: 4 additions & 2 deletions terraform-aws-github-runner/modules/runners/scale-up-chron.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ resource "aws_lambda_function" "scale_up_chron" {
# changes should reflect the changes in scale-up.tf
environment {
variables = {
AUTH_GH_APP = var.auth_gh_app
AUTH_GH_ORG = var.auth_gh_org
CANT_HAVE_ISSUES_LABELS = join(",", var.cant_have_issues_labels)
DATETIME_DEPLOY = local.datetime_deploy
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
Expand All @@ -53,16 +55,16 @@ resource "aws_lambda_function" "scale_up_chron" {
MUST_HAVE_ISSUES_LABELS = join(",", var.must_have_issues_labels)
REDIS_ENDPOINT = var.redis_endpoint
REDIS_LOGIN = var.redis_login
RETRY_SCALE_UP_CHRON_RECORD_QUEUE_URL = var.sqs_build_queue_retry.url
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

random: were these automated changes via some lint tool? Or did you change it manaully?

Total side comment, but it would be nice if the automation was part of CI!

RETRY_SCALE_UP_RECORD_DELAY_S = "60"
RETRY_SCALE_UP_RECORD_JITTER_PCT = "0.5"
RETRY_SCALE_UP_CHRON_RECORD_QUEUE_URL = var.sqs_build_queue_retry.url
RUNNER_EXTRA_LABELS = var.runner_extra_labels
SCALE_CONFIG_ORG = var.scale_config_org
SCALE_CONFIG_REPO = var.scale_config_repo
SCALE_CONFIG_REPO_PATH = var.scale_config_repo_path
SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id
SCALE_UP_CHRON_HUD_QUERY_URL = var.retry_scale_up_chron_hud_query_url
SCALE_UP_MAX_QUEUE_TIME_MINUTES = 30
SECRETSMANAGER_SECRETS_ID = var.secretsmanager_secrets_id

AWS_REGIONS_TO_VPC_IDS = join(
",",
Expand Down
4 changes: 3 additions & 1 deletion terraform-aws-github-runner/modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ resource "aws_lambda_function" "scale_up" {
environment {
# changes should reflect the changes in scale-up-chron.tf
variables = {
AUTH_GH_ORG = var.auth_gh_org
AUTH_GH_APP = var.auth_gh_app
CANT_HAVE_ISSUES_LABELS = join(",", var.cant_have_issues_labels)
DATETIME_DEPLOY = local.datetime_deploy
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
Expand All @@ -65,9 +67,9 @@ resource "aws_lambda_function" "scale_up" {
MUST_HAVE_ISSUES_LABELS = join(",", var.must_have_issues_labels)
REDIS_ENDPOINT = var.redis_endpoint
REDIS_LOGIN = var.redis_login
RETRY_SCALE_UP_CHRON_RECORD_QUEUE_URL = var.sqs_build_queue_retry.url
RETRY_SCALE_UP_RECORD_DELAY_S = "60"
RETRY_SCALE_UP_RECORD_JITTER_PCT = "0.5"
RETRY_SCALE_UP_CHRON_RECORD_QUEUE_URL = var.sqs_build_queue_retry.url
RUNNER_EXTRA_LABELS = var.runner_extra_labels
SCALE_CONFIG_ORG = var.scale_config_org
SCALE_CONFIG_REPO = var.scale_config_repo
Expand Down
12 changes: 12 additions & 0 deletions terraform-aws-github-runner/modules/runners/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ variable "aws_region_instances" {
type = list(string)
}

variable "auth_gh_app" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this var needed? Don't we already get this token somehow?

description = "GitHub App authentication token."
type = string
default = ""
}

variable "auth_gh_org" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

naming nit: can we have a name without "auth" in it? Maybe "ci_github_org" or just "github_org"

The auth part is rather incidental here (a gh app can be authorized to affect multiple github orgs). The key thing we want to know is which github org is this particular autoscaler fleet supposed to be managing.

description = "GitHub organization for the runners."
type = string
default = ""
}

variable "vpc_ids" {
description = "The list of vpc_id for aws_region. keys; 'vpc' 'region'"
type = list(map(string))
Expand Down
12 changes: 12 additions & 0 deletions terraform-aws-github-runner/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ variable "aws_region_instances" {
type = list(string)
}

variable "auth_gh_app" {
description = "GitHub App authentication token."
type = string
default = ""
}

variable "auth_gh_org" {
description = "GitHub organization for the runners."
type = string
default = ""
}

variable "vpc_ids" {
description = "The list of vpc_id for aws_region. keys: 'vpc' 'region'"
type = list(map(string))
Expand Down
Loading