Skip to content

Commit 493893b

Browse files
author
Mike Christie
committed
runner: fix state_lock and cfgfs use
The kernel can end up taking a configfs lock then call up to userspace, so we must not have a lock that is taken in this upcall and is taken when interacting with configfs. As reported by sherlockxiao: #595 this happens with the state_lock where during deletion the kernel will hold the state_lock, but some code paths will hold the state_lock while calling into configfs. This moves our configfs access out of the state_lock.
1 parent 7a6186b commit 493893b

File tree

4 files changed

+32
-18
lines changed

4 files changed

+32
-18
lines changed

target.c

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
static struct list_head tpg_recovery_list = LIST_HEAD_INIT(tpg_recovery_list);
2929
/*
3030
* Locking ordering:
31-
* rdev->state_lock
31+
* rdev->state_lock (see tcmur_device.h for more state_lock restrictions)
3232
* tpg_recovery_lock
3333
*/
3434
static pthread_mutex_t tpg_recovery_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -276,32 +276,23 @@ static void *tgt_port_grp_recovery_thread_fn(void *arg)
276276
return NULL;
277277
}
278278

279-
int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev)
279+
int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev,
280+
struct list_head *alua_list)
280281
{
281282
struct tcmur_device *rdev = tcmu_dev_get_private(dev);
282-
struct list_head alua_list;
283283
struct alua_grp *group;
284284
struct tgt_port_grp *tpg;
285285
struct tgt_port *port, *enabled_port = NULL;
286286
int ret;
287287

288288
pthread_mutex_lock(&tpg_recovery_lock);
289-
290-
list_head_init(&alua_list);
291-
ret = tcmu_get_alua_grps(dev, &alua_list);
292-
if (ret) {
293-
/* User is deleting device so fast fail */
294-
tcmu_dev_warn(dev, "Could not find any tpgs.\n");
295-
goto done;
296-
}
297-
298289
/*
299290
* This assumes a tcmu_dev is only exported though one local
300291
* enabled tpg. The kernel members file only returns
301292
* the one and runner is not passed info about which
302293
* tpg/port IO was received on.
303294
*/
304-
list_for_each(&alua_list, group, entry) {
295+
list_for_each(alua_list, group, entry) {
305296
list_for_each(&group->tgt_ports, port, entry) {
306297
if (port->enabled)
307298
enabled_port = port;
@@ -340,7 +331,6 @@ int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev)
340331
add_to_list:
341332
list_add(&tpg->devs, &rdev->recovery_entry);
342333
done:
343-
tcmu_release_alua_grps(&alua_list);
344334
pthread_mutex_unlock(&tpg_recovery_lock);
345335
return ret;
346336
}

target.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "ccan/list/list.h"
1313

1414
struct tgt_port_grp;
15+
struct list_head;
1516

1617
struct tgt_port {
1718
uint16_t rel_port_id;
@@ -31,6 +32,7 @@ struct tgt_port {
3132

3233
void tcmu_free_tgt_port(struct tgt_port *port);
3334
struct tgt_port *tcmu_get_tgt_port(char *member_str);
34-
int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev);
35+
int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev,
36+
struct list_head *alua_list);
3537

3638
#endif

tcmur_device.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,16 @@ void tcmu_cancel_recovery(struct tcmu_device *dev)
179179
void tcmu_notify_conn_lost(struct tcmu_device *dev)
180180
{
181181
struct tcmur_device *rdev = tcmu_dev_get_private(dev);
182+
struct list_head alua_list;
183+
int ret;
184+
185+
list_head_init(&alua_list);
186+
ret = tcmu_get_alua_grps(dev, &alua_list);
187+
if (ret) {
188+
/* User is deleting device so fast fail */
189+
tcmu_dev_warn(dev, "Could not find any tpgs.\n");
190+
return;
191+
}
182192

183193
pthread_mutex_lock(&rdev->state_lock);
184194

@@ -200,10 +210,11 @@ void tcmu_notify_conn_lost(struct tcmu_device *dev)
200210
tcmu_dev_err(dev, "Handler connection lost (lock state %d)\n",
201211
rdev->lock_state);
202212

203-
if (!tcmu_add_dev_to_recovery_list(dev))
213+
if (!tcmu_add_dev_to_recovery_list(dev, &alua_list))
204214
rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY;
205215
unlock:
206216
pthread_mutex_unlock(&rdev->state_lock);
217+
tcmu_release_alua_grps(&alua_list);
207218
}
208219

209220
/**
@@ -439,11 +450,11 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag)
439450

440451
tcmu_dev_info(dev, "Lock acquisition %s\n",
441452
rdev->lock == TCMUR_DEV_LOCK_LOCKED ? "successful" : "unsuccessful");
442-
tcmu_cfgfs_dev_exec_action(dev, "block_dev", 0);
443453

444454
pthread_cond_signal(&rdev->lock_cond);
445455
pthread_mutex_unlock(&rdev->state_lock);
446456

457+
tcmu_cfgfs_dev_exec_action(dev, "block_dev", 0);
447458
return ret;
448459
}
449460

tcmur_device.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,18 @@ struct tcmur_device {
5656
pthread_t lock_thread;
5757
pthread_cond_t lock_cond;
5858

59-
/* General lock for lock state, thread, dev state, etc */
59+
/*
60+
* General lock for lock state, thread, dev state, etc.
61+
*
62+
* Locking order:
63+
* 1. Kernel configfs lock
64+
* 2. state_lock
65+
* 3. tpg_recovery_lock
66+
*
67+
* On deletion the kernel will grab the configfs lock then call into
68+
* userspace, so we must not hold the state_lock then perform a configfs
69+
* operation.
70+
* */
6071
pthread_mutex_t state_lock;
6172
int pending_uas;
6273

0 commit comments

Comments
 (0)