Skip to content

Commit d04ae05

Browse files
committed
Add a case for packet loss when slowly reusing memory buffers
Under various conditions, when the host-to-device packet rate is high, we lose packets in QEMU due to a lack of guest-allocated buffers. Look also at virtio-win/kvm-guest-drivers-windows#1012 Signed-off-by: wji <[email protected]>
1 parent 3f60e13 commit d04ae05

File tree

2 files changed

+179
-0
lines changed

2 files changed

+179
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
- netkvm_buffer_shortage:
2+
virt_test_type = qemu
3+
type = netkvm_buffer_shortage
4+
only Windows
5+
only virtio_net
6+
vhost = on
7+
timeout = 360
8+
cdroms += " virtio"
9+
vms += " vm2"
10+
image_snapshot = yes
11+
start_vm = yes
12+
start_vm_vm2 = no
13+
smp = 2
14+
queues = ${smp}
15+
vectors = 1024
16+
port_num = 12345
17+
copy_dest = "C:\"
18+
server_script = "server.py"
19+
client_script = "client.py"
20+
check_live_python = "tasklist | findstr /i python"
21+
copy_all_cmd = 'xcopy "WIN_UTILS:\packet_loss_scripts\*" ${copy_dest}'
22+
i386:
23+
psutil_whl = "psutil-6.1.1-cp37-abi3-win32.whl"
24+
x86_64:
25+
psutil_whl = "psutil-6.1.1-cp37-abi3-win_amd64.whl"
26+
pip_cmd = "py -m pip install ${psutil_whl}"
27+
dest_location = "pushd ${copy_dest}"
28+
server_cmd = "start cmd /c py ${server_script} ${port_num}"
29+
client_cmd = "start cmd /c py ${client_script} 99999 %s ${port_num}"
30+
param_name = "MinRxBufferPercent"
31+
param_values = "0 25 50 75 100"

qemu/tests/netkvm_buffer_shortage.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import re
2+
3+
from virttest import env_process, error_context, utils_misc, utils_net
4+
5+
6+
@error_context.context_aware
7+
def run(test, params, env):
8+
"""
9+
Simulate high packet rate between host and device by running Python scripts
10+
on both server and client side. This test is executed on two VM guests:
11+
12+
1) Start a VM guest as the server.
13+
2) Start a VM guest as the client.
14+
3) Simulate buffer allocation issues on the server node.
15+
4) Use a Python script to connect the client to the server.
16+
5) Adjust the MinRxBufferPercent parameter to work around the issue.
17+
6) Ensure no BSOD occurs on the client node.
18+
19+
:param test: QEMU test object.
20+
:param params: Dictionary of test parameters.
21+
:param env: Dictionary of test environment details.
22+
"""
23+
24+
def analyze_ping_results(session, dest, count, timeout):
25+
"""
26+
Conduct a ping test to check the packet loss on slow memory buffer reallocation.
27+
28+
:param session: The session to execute the ping command.
29+
:param dest: Destination IP address to ping.
30+
:param count: Number of ICMP packets to send.
31+
:param timeout: Timeout for the ping command.
32+
"""
33+
status, output = utils_net.ping(
34+
dest=dest, session=session, count=count, timeout=timeout
35+
)
36+
if status != 0:
37+
test.fail("Ping failed, status: %s, output: %s" % (status, output))
38+
match = re.search(r"(\d+)% loss", output)
39+
return match and match.group(1)
40+
41+
def modify_and_analyze_params_result(vm, param_name, value):
42+
"""
43+
Set netkvm driver parameter and verify if it was correctly set.
44+
45+
:param vm: Target VM.
46+
:param param_name: Parameter name to be modified.
47+
:param value: Value to set.
48+
"""
49+
utils_net.set_netkvm_param_value(vm, param_name, value)
50+
cur_value = utils_net.get_netkvm_param_value(vm, param_name)
51+
if cur_value != value:
52+
test.fail(f"Failed to set '{param_name}' to '{value}'")
53+
54+
def check_and_restart_port(session, script_to_run):
55+
"""
56+
Check if a Python process is running. If not, restart the appropriate script.
57+
58+
:param session: The session to execute commands.
59+
:param script_to_run: The command to run the Python script.
60+
"""
61+
check_live_python = params.get("check_live_python")
62+
status, output = session.cmd_status_output(check_live_python, timeout=1200)
63+
if status == 0:
64+
return
65+
if "server" in script_to_run:
66+
s_session.cmd(dest_location)
67+
error_context.context("Run server script on the server node", test.log.info)
68+
status, output = session.cmd_status_output(server_cmd, timeout=1200)
69+
if status != 0:
70+
test.fail("The server node failed to start.")
71+
else:
72+
c_session.cmd(dest_location)
73+
error_context.context("Run client script on the client node", test.log.info)
74+
status, output = session.cmd_status_output(
75+
client_cmd % s_vm_ip, timeout=1200
76+
)
77+
if status != 0:
78+
test.fail("The client could not connect to the server node.")
79+
80+
timeout = params.get_numeric("login_timeout", 360)
81+
param_name = params.get("param_name")
82+
param_values = params.get("param_values")
83+
dest_location = params.get("dest_location")
84+
copy_all_cmd = params.get("copy_all_cmd")
85+
pip_cmd = params.get("pip_cmd")
86+
server_cmd = params.get("server_cmd")
87+
client_cmd = params.get("client_cmd")
88+
89+
s_vm_name = params["vms"].split()[0]
90+
s_vm = env.get_vm(s_vm_name)
91+
s_vm.verify_alive()
92+
s_session = s_vm.wait_for_serial_login(timeout=timeout)
93+
s_vm_ip = s_vm.get_address()
94+
95+
c_vm_name = params["vms"].split(s_vm_name)[1].strip()
96+
c_vm_params = params.object_params(c_vm_name)
97+
c_vm_params["nic_extra_params_nic1"] = ""
98+
c_vm_params["start_vm"] = "yes"
99+
env_process.preprocess_vm(test, c_vm_params, env, c_vm_name)
100+
c_vm = env.get_vm(c_vm_name)
101+
c_vm.verify_alive()
102+
c_session = c_vm.wait_for_serial_login(timeout=timeout)
103+
c_vm_ip = c_vm.get_address()
104+
105+
# Copy and install dependencies
106+
s_session.cmd(dest_location)
107+
copy_all_cmd = utils_misc.set_winutils_letter(s_session, copy_all_cmd)
108+
s_session.cmd(copy_all_cmd)
109+
110+
c_session.cmd(dest_location)
111+
copy_all_cmd = utils_misc.set_winutils_letter(c_session, copy_all_cmd)
112+
c_session.cmd(copy_all_cmd)
113+
c_session.cmd(pip_cmd)
114+
115+
# Run the packet loss simulation with different buffer settings
116+
ping_results = []
117+
error_context.context(
118+
"Modify NIC parameters on the server and monitor packet loss", test.log.info
119+
)
120+
for value in param_values.split():
121+
modify_and_analyze_params_result(vm=s_vm, param_name=param_name, value=value)
122+
check_and_restart_port(session=s_session, script_to_run=server_cmd)
123+
check_and_restart_port(session=c_session, script_to_run=client_cmd)
124+
ping_loss = int(
125+
analyze_ping_results(
126+
session=c_session, dest=s_vm_ip, count=100, timeout=timeout
127+
)
128+
)
129+
ping_results.append(ping_loss)
130+
131+
error_context.context("Analyze ping packet loss trend", test.log.info)
132+
if sum(ping_results) != 0:
133+
if not all(
134+
ping_results[i] > ping_results[i + 1] for i in range(len(ping_results) - 1)
135+
):
136+
test.fail(
137+
"With parameter changes, packet loss should decrease progressively."
138+
)
139+
140+
# Final validation on client side (no BSOD)
141+
error_context.context("Verify no BSOD on the client node", test.log.info)
142+
for value in param_values.split():
143+
modify_and_analyze_params_result(vm=c_vm, param_name=param_name, value=value)
144+
status, output = utils_net.ping(
145+
dest=c_vm_ip, session=c_session, count=10, timeout=60
146+
)
147+
if status != 0:
148+
test.fail("Ping failed, status: %s, output: %s" % (status, output))

0 commit comments

Comments
 (0)