Skip to content

Commit c08ee32

Browse files
committed
Add a case for packet loss when slowly reusing memory buffers
Under various conditions, when the host-to-device packet rate is high, we lose packets in QEMU due to a lack of guest-allocated buffers. Look also at virtio-win/kvm-guest-drivers-windows#1012 Signed-off-by: wji <[email protected]>
1 parent 3f60e13 commit c08ee32

File tree

2 files changed

+171
-0
lines changed

2 files changed

+171
-0
lines changed
+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
- netkvm_buffer_shortage:
2+
virt_test_type = qemu
3+
type = netkvm_buffer_shortage
4+
only Windows
5+
only virtio_net
6+
vhost = on
7+
timeout = 360
8+
cdroms += " virtio"
9+
vms += " vm2"
10+
image_snapshot = yes
11+
start_vm = yes
12+
start_vm_vm2 = no
13+
smp = 2
14+
queues = ${smp}
15+
vectors = 1024
16+
port_num = 12345
17+
copy_dest = "C:\"
18+
server_script = "server.py"
19+
client_script = "client.py"
20+
check_live_python = "tasklist | findstr /i python"
21+
copy_all_cmd = 'xcopy "WIN_UTILS:\packet_loss_scripts\*" ${copy_dest} /Y /I /Q /F'
22+
i386:
23+
psutil_whl = "psutil-6.1.1-cp37-abi3-win32.whl"
24+
x86_64:
25+
psutil_whl = "psutil-6.1.1-cp37-abi3-win_amd64.whl"
26+
pip_cmd = "py -m pip install ${psutil_whl}"
27+
dest_location = "pushd ${copy_dest}"
28+
server_cmd = "start cmd /c py ${server_script} ${port_num}"
29+
client_cmd = "start cmd /c py ${client_script} 99999 %s ${port_num}"
30+
param_name = "MinRxBufferPercent"
31+
param_values = "0 25 50 75 100"

qemu/tests/netkvm_buffer_shortage.py

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import re
2+
3+
from virttest import env_process, error_context, utils_misc, utils_net
4+
5+
6+
@error_context.context_aware
7+
def run(test, params, env):
8+
"""
9+
Simulate high packet rate between host and device by running Python scripts
10+
on both server and client side. This test is executed on two VM guests:
11+
12+
1) Start a VM guest as the server.
13+
2) Start a VM guest as the client.
14+
3) Simulate buffer allocation issues on the server node.
15+
4) Use a Python script to connect the client to the server.
16+
5) Adjust the MinRxBufferPercent parameter to work around the issue.
17+
6) Ensure no BSOD occurs on the client node.
18+
19+
:param test: QEMU test object.
20+
:param params: Dictionary of test parameters.
21+
:param env: Dictionary of test environment details.
22+
"""
23+
24+
def analyze_ping_results(session, dest, count, timeout):
25+
"""
26+
Conduct a ping test to check the packet loss on slow memory buffer reallocation.
27+
28+
:param session: The session to execute the ping command.
29+
:param dest: Destination IP address to ping.
30+
:param count: Number of ICMP packets to send.
31+
:param timeout: Timeout for the ping command.
32+
"""
33+
status, output = utils_net.ping(
34+
dest=dest, session=session, count=count, timeout=timeout
35+
)
36+
if status != 0:
37+
test.fail("Ping failed, status: %s, output: %s" % (status, output))
38+
match = re.search(r"(\d+)% loss", output)
39+
return match and match.group(1)
40+
41+
def modify_and_analyze_params_result(vm, param_name, value):
42+
"""
43+
Set netkvm driver parameter and verify if it was correctly set.
44+
45+
:param vm: Target VM.
46+
:param param_name: Parameter name to be modified.
47+
:param value: Value to set.
48+
"""
49+
utils_net.set_netkvm_param_value(vm, param_name, value)
50+
cur_value = utils_net.get_netkvm_param_value(vm, param_name)
51+
if cur_value != value:
52+
test.fail(f"Failed to set '{param_name}' to '{value}'")
53+
54+
def check_and_restart_port(session, script_to_run):
55+
"""
56+
Check if a Python process is running. If not, restart the appropriate script.
57+
58+
:param session: The session to execute commands.
59+
:param script_to_run: The command to run the Python script.
60+
"""
61+
check_live_python = params.get("check_live_python")
62+
status, output = session.cmd_status_output(check_live_python, timeout=1200)
63+
if status == 0:
64+
return
65+
if "server" in script_to_run:
66+
s_session.cmd(dest_location)
67+
error_context.context("Run server script on the server node", test.log.info)
68+
status, output = session.cmd_status_output(server_cmd, timeout=1200)
69+
if status != 0:
70+
test.fail("The server node failed to start.")
71+
else:
72+
c_session.cmd(dest_location)
73+
error_context.context("Run client script on the client node", test.log.info)
74+
status, output = session.cmd_status_output(client_cmd % s_vm_ip, timeout=1200)
75+
if status != 0:
76+
test.fail("The client could not connect to the server node.")
77+
78+
timeout = params.get_numeric("login_timeout", 360)
79+
param_name = params.get("param_name")
80+
param_values = params.get("param_values")
81+
dest_location = params.get("dest_location")
82+
copy_all_cmd = params.get("copy_all_cmd")
83+
pip_cmd = params.get("pip_cmd")
84+
server_cmd = params.get("server_cmd")
85+
client_cmd = params.get("client_cmd")
86+
87+
s_vm_name = params["vms"].split()[0]
88+
s_vm = env.get_vm(s_vm_name)
89+
s_vm.verify_alive()
90+
s_session = s_vm.wait_for_serial_login(timeout=timeout)
91+
s_vm_ip = s_vm.get_address()
92+
93+
c_vm_name = params["vms"].split(s_vm_name)[1].strip()
94+
c_vm_params = params.object_params(c_vm_name)
95+
c_vm_params["nic_extra_params_nic1"] = ""
96+
c_vm_params["start_vm"] = "yes"
97+
env_process.preprocess_vm(test, c_vm_params, env, c_vm_name)
98+
c_vm = env.get_vm(c_vm_name)
99+
c_vm.verify_alive()
100+
c_session = c_vm.wait_for_serial_login(timeout=timeout)
101+
c_vm_ip = c_vm.get_address()
102+
103+
# Copy and install dependencies
104+
s_session.cmd(dest_location)
105+
s_session.cmd(copy_all_cmd)
106+
107+
c_session.cmd(dest_location)
108+
c_session.cmd(copy_all_cmd)
109+
c_session.cmd(pip_cmd)
110+
111+
# Run the packet loss simulation with different buffer settings
112+
ping_results = []
113+
error_context.context(
114+
"Modify NIC parameters on the server and monitor packet loss", test.log.info
115+
)
116+
for value in param_values.split():
117+
modify_and_analyze_params_result(vm=s_vm, param_name=param_name, value=value)
118+
check_and_restart_port(session=s_session, script_to_run=server_cmd)
119+
check_and_restart_port(session=c_session, script_to_run=client_cmd)
120+
ping_loss = int(analyze_ping_results(
121+
session=c_session, dest=s_vm_ip, count=100, timeout=timeout
122+
))
123+
ping_results.append(ping_loss)
124+
125+
error_context.context("Analyze ping packet loss trend", test.log.info)
126+
if sum(ping_results) != 0:
127+
if not all(ping_results[i] > ping_results[i + 1] for i in range(len(ping_results) - 1)):
128+
test.fail(
129+
"With parameter changes, packet loss should decrease progressively."
130+
)
131+
132+
# Final validation on client side (no BSOD)
133+
error_context.context("Verify no BSOD on the client node", test.log.info)
134+
for value in param_values.split():
135+
modify_and_analyze_params_result(vm=c_vm, param_name=param_name, value=value)
136+
status, output = utils_net.ping(
137+
dest=c_vm_ip, session=c_session, count=10, timeout=60
138+
)
139+
if status != 0:
140+
test.fail("Ping failed, status: %s, output: %s" % (status, output))

0 commit comments

Comments
 (0)