From 5e409274d4de407a7961b98fb0fe1c0465acbe7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=A0=84=EC=9B=85=EC=9E=AC?= Date: Tue, 7 Jan 2025 17:02:34 +0900 Subject: [PATCH] DTC-8156 add ansible monitor function --- pkg/aws/ssm.go | 76 ++++++++++++++++++++++++++++++++++++++++ pkg/deployer/deployer.go | 15 ++++++++ 2 files changed, 91 insertions(+) diff --git a/pkg/aws/ssm.go b/pkg/aws/ssm.go index 5f3f15c..a92a247 100644 --- a/pkg/aws/ssm.go +++ b/pkg/aws/ssm.go @@ -17,10 +17,14 @@ limitations under the license. package aws import ( + "fmt" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/client" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/service/ssm" + "github.com/sirupsen/logrus" + "strings" + "time" ) type SSMClient struct { @@ -58,3 +62,75 @@ func (s SSMClient) SendCommand(target []*string, commands []*string) bool { return true } + +func (s SSMClient) MonitorAnsibleLog(instanceId string) (bool, error) { + input := &ssm.SendCommandInput{ + DocumentName: aws.String("AWS-RunShellScript"), + InstanceIds: []*string{aws.String(instanceId)}, + Parameters: map[string][]*string{ + "commands": []*string{ + aws.String("tail -f /var/log/ansible.log"), + }, + }, + } + + output, err := s.Client.SendCommand(input) + if err != nil { + return false, err + } + + var lastContent string + for { + commandOutput, err := s.Client.GetCommandInvocation(&ssm.GetCommandInvocationInput{ + CommandId: output.Command.CommandId, + InstanceId: aws.String(instanceId), + }) + if err != nil { + return false, err + } + + if *commandOutput.Status != "Success" { + continue + } + + currentContent := *commandOutput.StandardOutputContent + if newContent := strings.TrimPrefix(currentContent, lastContent); newContent != "" { + logrus.Info(fmt.Sprintf("\n%s", newContent)) + lastContent = currentContent + } + + if !strings.Contains(currentContent, "PLAY RECAP") { + time.Sleep(time.Second) + continue + } + + playRecapLines := extractPlayRecap(currentContent) + logrus.Info(fmt.Sprintf("Ansible Execution Completed:\n%s", playRecapLines)) + + if !strings.Contains(playRecapLines, "failed=0") || !strings.Contains(playRecapLines, "unreachable=0") { + return false, fmt.Errorf("ansible execution failed: %s", playRecapLines) + } + + return true, nil + } +} + +func extractPlayRecap(log string) string { + lines := strings.Split(log, "\n") + var recap []string + inRecap := false + + for _, line := range lines { + if strings.Contains(line, "PLAY RECAP") { + inRecap = true + } + if inRecap { + recap = append(recap, line) + if strings.TrimSpace(line) == "" { + break + } + } + } + + return strings.Join(recap, "\n") +} diff --git a/pkg/deployer/deployer.go b/pkg/deployer/deployer.go index 9b8b4c2..493cae5 100644 --- a/pkg/deployer/deployer.go +++ b/pkg/deployer/deployer.go @@ -958,6 +958,21 @@ func (d *Deployer) HealthChecking(config schemas.Config) (bool, error) { } d.Logger.Debugf("Health check target autoscaling group: %s / %s", region.Region, *asg.AutoScalingGroupName) + if len(asg.Instances) > 0 { + for _, instance := range asg.Instances { + success, err := client.SSMService.MonitorAnsibleLog(*instance.InstanceId) + if err != nil { + d.Logger.Error(fmt.Sprintf("Error monitoring ansible log: %v", err)) + return false, err + } + if !success { + d.Logger.Error("Ansible execution failed") + return false, err + } + } + d.Logger.Info("Ansible execution completed successfully") + } + isHealthy, err := d.Polling(region, asg, client, config.ForceManifestCapacity, isUpdate, config.DownSizingUpdate) if err != nil { return false, err