Skip to content

qemu_guest_agent: Add new api support 'guest-get-load' #4309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions qemu/tests/cfg/qemu_guest_agent.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,13 @@
remove_image_image1 = yes
cmd_run_debugview = 'start WIN_UTILS:\Debugviewconsole.exe -d C:\debug.dbglog'
cmd_check_string_VSS = 'type C:\debug.dbglog | findstr /i "QEMU Guest Agent VSS Provider\[[0-9]*\]"'
- check_get_load:
gagent_check_type = get_load
cmd_get_load = "cat /proc/loadavg |awk '{print $1,$2,$3}'"
cmd_install_stressng = "dnf -y install stress-ng"
cmd_run_stress = "stress-ng --cpu 8 --cpu-load 80 --timeout 30 --quiet &"
Windows:
cmd_run_stress = powershell "1..12 | ForEach-Object { Start-Job -ScriptBlock { while ($true) { [math]::sqrt(999999) } } }"
variants:
- virtio_serial:
gagent_serial_type = virtio
Expand Down
160 changes: 160 additions & 0 deletions qemu/tests/qemu_guest_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,166 @@ def ip_addr_check(session, mac_addr, ret_list, if_index, if_name):
if session_serial:
session_serial.close()

@error_context.context_aware
def gagent_check_get_load(self, test, params, env):
"""
Test guest-get-load command functionality.

Steps:
1) Get initial load values and verify qga/guest match
2) Start stress test and verify load increases
3) Stop stress test and verify load decreases

:param test: kvm test object
:param params: Dictionary with test parameters
:param env: Dictionary with test environment
"""

def _get_load_stats(session, get_guest=True):
"""
Get load statistics from either guest OS or QGA.
Returns tuple of (1min, 5min, 15min) load values.
"""
if get_guest:
try:
loads = session.cmd_output(params["cmd_get_load"]).strip().split()
return tuple(round(float(x), 2) for x in loads[:3])
except (IndexError, ValueError) as e:
test.error(f"Failed to get guest load stats: {e}")
else:
try:
loads = self.gagent.get_load()
load_keys = ("load1m", "load5m", "load15m")
return tuple(round(float(loads[k]), 2) for k in load_keys)
except (KeyError, ValueError) as e:
test.error(f"Failed to get QGA load stats: {e}")

def _verify_load_values(qga_vals, guest_vals, check_type="match"):
"""
Compare load values between QGA and guest OS.
Also verifies if values changed as expected.
"""
errors = []
periods = ["1-minute", "5-minute", "15-minute"]

for period, qga, guest in zip(periods, qga_vals, guest_vals):
if abs(qga - guest) > 0.5:
errors.append(
f"{period} load mismatch: guest={guest:.2f}, qga={qga:.2f}"
)

# Only check load1m for increase/decrease
if check_type != "match" and prev_values:
qga_1m = qga_vals[0]
guest_1m = guest_vals[0]
prev_qga_1m = prev_values["qga"][0]
prev_guest_1m = prev_values["guest"][0]

if check_type == "increase":
if qga_1m <= prev_qga_1m or guest_1m <= prev_guest_1m:
errors.append(
"1-minute load did not increase as expected:\n"
f"QGA: {prev_qga_1m:.2f} -> {qga_1m:.2f}\n"
f"Guest: {prev_guest_1m:.2f} -> {guest_1m:.2f}"
)
elif check_type == "decrease":
if qga_1m >= prev_qga_1m or guest_1m >= prev_guest_1m:
errors.append(
"1-minute load did not decrease as expected:\n"
f"QGA: {prev_qga_1m:.2f} -> {qga_1m:.2f}\n"
f"Guest: {prev_guest_1m:.2f} -> {guest_1m:.2f}"
)

return errors

def _log_load_values(guest_vals, qga_vals, phase):
"""Log load values in a consistent format"""
LOG_JOB.info(
"%s load averages:\nGuest OS: %s\nQGA: %s",
phase,
[f"{x:.2f}" for x in guest_vals],
[f"{x:.2f}" for x in qga_vals],
)

session = self._get_session(params, self.vm)
self._open_session_list.append(session)
prev_values = None

if params.get("os_type") == "windows":
error_context.context("Get load info for Windows", LOG_JOB.info)
try:
# Get initial load values
load_info = self.gagent.get_load()
# Check if all required fields exist
for key in ["load1m", "load5m", "load15m"]:
if key not in load_info:
test.fail(f"Missing {key} in guest-get-load return value")
initial_load = load_info["load1m"]
LOG_JOB.info("Initial load info from guest-agent: %s", load_info)

# Start CPU stress test
error_context.context("Start CPU stress test", LOG_JOB.info)
session.cmd(params["cmd_run_stress"])
time.sleep(10)

# Get load values after stress
load_info = self.gagent.get_load()
stress_load = load_info["load1m"]
LOG_JOB.info("Load info after stress: %s", load_info)

# Verify load value changed
if stress_load <= initial_load:
test.fail(
f"Load value did not increase after CPU stress:"
f" before={initial_load}, after={stress_load}"
)
LOG_JOB.info(
"Load value increased as expected:" " before=%s, after=%s",
initial_load,
stress_load,
)
except guest_agent.VAgentCmdError as e:
test.fail(f"guest-get-load command failed: {e}")
else:
# Initial load check
error_context.context("Check initial load average info", LOG_JOB.info)
guest_vals = _get_load_stats(session)
qga_vals = _get_load_stats(session, False)
prev_values = {"guest": guest_vals, "qga": qga_vals}

_log_load_values(guest_vals, qga_vals, "Initial")

if errors := _verify_load_values(qga_vals, guest_vals):
test.fail("Initial load check failed:\n" + "\n".join(errors))

# Stress test
error_context.context("Starting CPU stress test", LOG_JOB.info)
s, o = session.cmd_status_output(params["cmd_install_stressng"])
if s != 0:
test.error(f"Failed to install stress-ng: {o}")
session.cmd(params["cmd_run_stress"])
time.sleep(25)

guest_vals = _get_load_stats(session)
qga_vals = _get_load_stats(session, False)

_log_load_values(guest_vals, qga_vals, "Under stress")

if errors := _verify_load_values(qga_vals, guest_vals, "increase"):
test.fail("Stress test load check failed:\n" + "\n".join(errors))

prev_values = {"guest": guest_vals, "qga": qga_vals}

# sleep (60) wait for the stress-ng terminated.
time.sleep(60)
guest_vals = _get_load_stats(session)
qga_vals = _get_load_stats(session, False)

_log_load_values(guest_vals, qga_vals, "After stress")

if errors := _verify_load_values(qga_vals, guest_vals, "decrease"):
test.fail("Post-stress load check failed:\n" + "\n".join(errors))

@error_context.context_aware
def gagent_check_reboot_shutdown(self, test, params, env):
"""
Expand Down