Skip to content

Commit 92c69bb

Browse files
oshogbobehlendorf
authored andcommitted
Add knob to disable slow io notifications
Introduce a new vdev property `VDEV_PROP_SLOW_IO_REPORTING` that allows users to disable notifications for slow devices. This prevents ZED and/or ZFSD from degrading the pool due to slow I/O. Signed-off-by: Mariusz Zaborski <[email protected]>
1 parent 10a78e2 commit 92c69bb

File tree

10 files changed

+123
-33
lines changed

10 files changed

+123
-33
lines changed

include/sys/fs/zfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ typedef enum {
385385
VDEV_PROP_TRIM_SUPPORT,
386386
VDEV_PROP_TRIM_ERRORS,
387387
VDEV_PROP_SLOW_IOS,
388+
VDEV_PROP_SLOW_IO_EVENTS,
388389
VDEV_NUM_PROPS
389390
} vdev_prop_t;
390391

include/sys/vdev_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,7 @@ struct vdev {
464464
uint64_t vdev_checksum_t;
465465
uint64_t vdev_io_n;
466466
uint64_t vdev_io_t;
467+
boolean_t vdev_slow_io_events;
467468
uint64_t vdev_slow_io_n;
468469
uint64_t vdev_slow_io_t;
469470
};

lib/libzfs/libzfs.abi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6116,7 +6116,8 @@
61166116
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
61176117
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
61186118
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
6119-
<enumerator name='VDEV_NUM_PROPS' value='52'/>
6119+
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='52'/>
6120+
<enumerator name='VDEV_NUM_PROPS' value='53'/>
61206121
</enum-decl>
61216122
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
61226123
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

man/man7/vdevprops.7

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ section, below.
4545
Every vdev has a set of properties that export statistics about the vdev
4646
as well as control various behaviors.
4747
Properties are not inherited from top-level vdevs, with the exception of
48-
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
48+
checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t.
4949
.Pp
5050
The values of numeric properties can be specified using human-readable suffixes
5151
.Po for example,
@@ -126,7 +126,8 @@ Indicates if a leaf device supports trim operations.
126126
.Pp
127127
The following native properties can be used to change the behavior of a vdev.
128128
.Bl -tag -width "allocating"
129-
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_n , slow_io_t
129+
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_events, slow_io_n ,
130+
.It Sy slow_io_t
130131
Tune the fault management daemon by specifying checksum/io thresholds of <N>
131132
errors in <T> seconds, respectively.
132133
These properties can be set on leaf and top-level vdevs.

module/zcommon/zpool_prop.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,9 @@ vdev_prop_init(void)
475475
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
476476
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
477477
sfeatures);
478+
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
479+
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
480+
"SLOW_IO_EVENTS", boolean_table, sfeatures);
478481

479482
/* hidden properties */
480483
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,

module/zfs/vdev.c

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -430,32 +430,53 @@ vdev_get_nparity(vdev_t *vd)
430430
}
431431

432432
static int
433-
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
433+
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
434434
{
435-
spa_t *spa = vd->vdev_spa;
436-
objset_t *mos = spa->spa_meta_objset;
437-
uint64_t objid;
438-
int err;
439435

440436
if (vd->vdev_root_zap != 0) {
441-
objid = vd->vdev_root_zap;
437+
*objid = vd->vdev_root_zap;
442438
} else if (vd->vdev_top_zap != 0) {
443-
objid = vd->vdev_top_zap;
439+
*objid = vd->vdev_top_zap;
444440
} else if (vd->vdev_leaf_zap != 0) {
445-
objid = vd->vdev_leaf_zap;
441+
*objid = vd->vdev_leaf_zap;
446442
} else {
447443
return (EINVAL);
448444
}
449445

446+
return (0);
447+
}
448+
449+
static int
450+
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
451+
{
452+
spa_t *spa = vd->vdev_spa;
453+
objset_t *mos = spa->spa_meta_objset;
454+
uint64_t objid;
455+
int err;
456+
457+
if (vdev_prop_get_objid(vd, &objid) != 0)
458+
return (EINVAL);
459+
450460
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
451461
sizeof (uint64_t), 1, value);
452-
453462
if (err == ENOENT)
454463
*value = vdev_prop_default_numeric(prop);
455464

456465
return (err);
457466
}
458467

468+
static int
469+
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
470+
{
471+
int err;
472+
uint64_t ivalue;
473+
474+
err = vdev_prop_get_int(vd, prop, &ivalue);
475+
*bvalue = ivalue != 0;
476+
477+
return (err);
478+
}
479+
459480
/*
460481
* Get the number of data disks for a top-level vdev.
461482
*/
@@ -717,8 +738,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
717738
*/
718739
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
719740
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
741+
720742
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
721743
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
744+
745+
vd->vdev_slow_io_events = vdev_prop_default_numeric(
746+
VDEV_PROP_SLOW_IO_EVENTS);
722747
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
723748
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
724749

@@ -3879,6 +3904,11 @@ vdev_load(vdev_t *vd)
38793904
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
38803905
"failed [error=%d]", (u_longlong_t)zapobj, error);
38813906

3907+
error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
3908+
&vd->vdev_slow_io_events);
3909+
if (error && error != ENOENT)
3910+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3911+
"failed [error=%d]", (u_longlong_t)zapobj, error);
38823912
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
38833913
&vd->vdev_slow_io_n);
38843914
if (error && error != ENOENT)
@@ -5926,15 +5956,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
59265956
/*
59275957
* Set vdev property values in the vdev props mos object.
59285958
*/
5929-
if (vd->vdev_root_zap != 0) {
5930-
objid = vd->vdev_root_zap;
5931-
} else if (vd->vdev_top_zap != 0) {
5932-
objid = vd->vdev_top_zap;
5933-
} else if (vd->vdev_leaf_zap != 0) {
5934-
objid = vd->vdev_leaf_zap;
5935-
} else {
5959+
if (vdev_prop_get_objid(vd, &objid) != 0)
59365960
panic("unexpected vdev type");
5937-
}
59385961

59395962
mutex_enter(&spa->spa_props_lock);
59405963

@@ -6111,6 +6134,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61116134
}
61126135
vd->vdev_io_t = intval;
61136136
break;
6137+
case VDEV_PROP_SLOW_IO_EVENTS:
6138+
if (nvpair_value_uint64(elem, &intval) != 0) {
6139+
error = EINVAL;
6140+
break;
6141+
}
6142+
vd->vdev_slow_io_events = intval != 0;
6143+
break;
61146144
case VDEV_PROP_SLOW_IO_N:
61156145
if (nvpair_value_uint64(elem, &intval) != 0) {
61166146
error = EINVAL;
@@ -6152,6 +6182,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61526182
nvpair_t *elem = NULL;
61536183
nvlist_t *nvprops = NULL;
61546184
uint64_t intval = 0;
6185+
boolean_t boolval = 0;
61556186
char *strval = NULL;
61566187
const char *propname = NULL;
61576188
vdev_prop_t prop;
@@ -6165,15 +6196,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61656196

61666197
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
61676198

6168-
if (vd->vdev_root_zap != 0) {
6169-
objid = vd->vdev_root_zap;
6170-
} else if (vd->vdev_top_zap != 0) {
6171-
objid = vd->vdev_top_zap;
6172-
} else if (vd->vdev_leaf_zap != 0) {
6173-
objid = vd->vdev_leaf_zap;
6174-
} else {
6199+
if (vdev_prop_get_objid(vd, &objid) != 0)
61756200
return (SET_ERROR(EINVAL));
6176-
}
61776201
ASSERT(objid != 0);
61786202

61796203
mutex_enter(&spa->spa_props_lock);
@@ -6482,6 +6506,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
64826506
vdev_prop_add_list(outnvl, propname, strval,
64836507
intval, src);
64846508
break;
6509+
case VDEV_PROP_SLOW_IO_EVENTS:
6510+
err = vdev_prop_get_bool(vd, prop, &boolval);
6511+
if (err && err != ENOENT)
6512+
break;
6513+
6514+
src = ZPROP_SRC_LOCAL;
6515+
if (boolval == vdev_prop_default_numeric(prop))
6516+
src = ZPROP_SRC_DEFAULT;
6517+
6518+
vdev_prop_add_list(outnvl, propname, NULL,
6519+
boolval, src);
6520+
break;
64856521
case VDEV_PROP_CHECKSUM_N:
64866522
case VDEV_PROP_CHECKSUM_T:
64876523
case VDEV_PROP_IO_N:

module/zfs/zfs_fm.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
223223
case VDEV_PROP_IO_T:
224224
propval = vd->vdev_io_t;
225225
break;
226+
case VDEV_PROP_SLOW_IO_EVENTS:
227+
propval = vd->vdev_slow_io_events;
228+
break;
226229
case VDEV_PROP_SLOW_IO_N:
227230
propval = vd->vdev_slow_io_n;
228231
break;

module/zfs/zio.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5501,9 +5501,12 @@ zio_done(zio_t *zio)
55015501
zio->io_vd->vdev_stat.vs_slow_ios++;
55025502
mutex_exit(&zio->io_vd->vdev_stat_lock);
55035503

5504-
(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
5505-
zio->io_spa, zio->io_vd, &zio->io_bookmark,
5506-
zio, 0);
5504+
if (zio->io_vd->vdev_slow_io_events) {
5505+
(void) zfs_ereport_post(
5506+
FM_EREPORT_ZFS_DELAY,
5507+
zio->io_spa, zio->io_vd,
5508+
&zio->io_bookmark, zio, 0);
5509+
}
55075510
}
55085511
}
55095512
}

tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ typeset -a properties=(
7171
checksum_t
7272
io_n
7373
io_t
74+
slow_io_events
7475
slow_io_n
7576
slow_io_t
7677
trim_support

tests/zfs-tests/tests/functional/events/zed_slow_io.ksh

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#
2525
# Copyright (c) 2023, Klara Inc.
26+
# Copyright (c) 2025, Mariusz Zaborski <[email protected]>
2627
#
2728

2829
# DESCRIPTION:
@@ -140,8 +141,8 @@ function slow_io_degrade
140141
{
141142
do_setup
142143

143-
zpool set slow_io_n=5 $TESTPOOL $VDEV
144-
zpool set slow_io_t=60 $TESTPOOL $VDEV
144+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
145+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
145146

146147
start_slow_io
147148
for i in {1..16}; do
@@ -193,6 +194,44 @@ function slow_io_no_degrade
193194
do_clean
194195
}
195196

197+
# Change slow_io_n, slow_io_t to 5 events in 60 seconds
198+
# fire more than 5 events. Disable slow io events.
199+
# Should not degrade.
200+
function slow_io_degrade_disabled
201+
{
202+
do_setup
203+
204+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
205+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
206+
log_must zpool set slow_io_events=off $TESTPOOL $VDEV
207+
208+
start_slow_io
209+
for i in {1..16}; do
210+
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
211+
sleep 0.5
212+
done
213+
stop_slow_io
214+
zpool sync
215+
216+
#
217+
# wait 60 seconds to confirm that zfs.delay was not generated.
218+
#
219+
typeset -i i=0
220+
typeset -i events=0
221+
while [[ $i -lt 60 ]]; do
222+
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
223+
i=$((i+1))
224+
sleep 1
225+
done
226+
log_note "$events delay events found"
227+
228+
[ $events -eq "0" ] || \
229+
log_fail "expecting no delay events, found $events"
230+
231+
log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
232+
do_clean
233+
}
234+
196235
log_assert "Test ZED slow io configurability"
197236
log_onexit cleanup
198237

@@ -202,5 +241,6 @@ log_must zed_start
202241
default_degrade
203242
slow_io_degrade
204243
slow_io_no_degrade
244+
slow_io_degrade_disabled
205245

206246
log_pass "Test ZED slow io configurability"

0 commit comments

Comments
 (0)