Skip to content

Commit 59e87bf

Browse files
committed
prov/efa: Fail early in rma when iov count exceeds device support
efa-direct offloads the rma request to rdma wqes in efa device directly. But today efa device doesn't support > 1 iov (queried as ibv_device_attr.max_sge_rd). This patch makes efa_post_read and efa_post_write fail early with clear error message for this scenario to avoid unexpected crashing. Signed-off-by: Shi Jin <[email protected]>
1 parent 46d451b commit 59e87bf

File tree

4 files changed

+73
-2
lines changed

4 files changed

+73
-2
lines changed

prov/efa/src/efa_rma.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,10 @@ static inline ssize_t efa_rma_post_read(struct efa_base_ep *base_ep,
5656
ofi_total_iov_len(msg->msg_iov, msg->iov_count),
5757
msg->addr, (size_t) msg->context, flags);
5858

59-
assert(msg->iov_count > 0 &&
60-
msg->iov_count <= base_ep->domain->info->tx_attr->iov_limit);
59+
if (OFI_UNLIKELY(msg->iov_count > base_ep->domain->device->ibv_attr.max_sge_rd)) {
60+
EFA_WARN(FI_LOG_EP_DATA, "EFA device currently doesn't support > %d iov for rdma work request\n", base_ep->domain->device->ibv_attr.max_sge_rd);
61+
return -FI_EINVAL;
62+
}
6163
assert(msg->rma_iov_count > 0 &&
6264
msg->rma_iov_count <= base_ep->domain->info->tx_attr->rma_iov_limit);
6365
assert(ofi_total_iov_len(msg->msg_iov, msg->iov_count) <=
@@ -207,6 +209,11 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep,
207209
return -FI_ENOSYS;
208210
}
209211

212+
if (OFI_UNLIKELY(msg->iov_count > base_ep->domain->device->ibv_attr.max_sge_rd)) {
213+
EFA_WARN(FI_LOG_EP_DATA, "EFA device currently doesn't support > %d iov for rdma work request\n", base_ep->domain->device->ibv_attr.max_sge_rd);
214+
return -FI_EINVAL;
215+
}
216+
210217
efa_tracepoint(write_begin_msg_context, (size_t) msg->context, (size_t) msg->addr);
211218

212219
EFA_DBG(FI_LOG_EP_DATA,

prov/efa/test/efa_unit_test_rma.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,3 +299,63 @@ void test_efa_rma_writemsg_with_inject(struct efa_resource **state)
299299

300300
efa_unit_test_buff_destruct(&local_buff);
301301
}
302+
303+
void test_efa_rma_readv_multiple_iov_einval(struct efa_resource **state)
304+
{
305+
struct efa_resource *resource = *state;
306+
struct efa_unit_test_buff local_buff1, local_buff2;
307+
struct iovec iov[2];
308+
fi_addr_t src_addr;
309+
void *desc[2];
310+
int ret;
311+
uint64_t remote_addr = 0x87654321;
312+
uint64_t remote_key = 123456;
313+
314+
test_efa_rma_prep(resource, &src_addr);
315+
efa_unit_test_buff_construct(&local_buff1, resource, 2048);
316+
efa_unit_test_buff_construct(&local_buff2, resource, 2048);
317+
318+
iov[0].iov_base = local_buff1.buff;
319+
iov[0].iov_len = local_buff1.size;
320+
iov[1].iov_base = local_buff2.buff;
321+
iov[1].iov_len = local_buff2.size;
322+
desc[0] = fi_mr_desc(local_buff1.mr);
323+
desc[1] = fi_mr_desc(local_buff2.mr);
324+
325+
ret = fi_readv(resource->ep, iov, desc, 2, src_addr, remote_addr,
326+
remote_key, NULL);
327+
assert_int_equal(ret, -FI_EINVAL);
328+
329+
efa_unit_test_buff_destruct(&local_buff1);
330+
efa_unit_test_buff_destruct(&local_buff2);
331+
}
332+
333+
void test_efa_rma_writev_multiple_iov_einval(struct efa_resource **state)
334+
{
335+
struct efa_resource *resource = *state;
336+
struct efa_unit_test_buff local_buff1, local_buff2;
337+
struct iovec iov[2];
338+
fi_addr_t dest_addr;
339+
void *desc[2];
340+
int ret;
341+
uint64_t remote_addr = 0x87654321;
342+
uint64_t remote_key = 123456;
343+
344+
test_efa_rma_prep(resource, &dest_addr);
345+
efa_unit_test_buff_construct(&local_buff1, resource, 2048);
346+
efa_unit_test_buff_construct(&local_buff2, resource, 2048);
347+
348+
iov[0].iov_base = local_buff1.buff;
349+
iov[0].iov_len = local_buff1.size;
350+
iov[1].iov_base = local_buff2.buff;
351+
iov[1].iov_len = local_buff2.size;
352+
desc[0] = fi_mr_desc(local_buff1.mr);
353+
desc[1] = fi_mr_desc(local_buff2.mr);
354+
355+
ret = fi_writev(resource->ep, iov, desc, 2, dest_addr, remote_addr,
356+
remote_key, NULL);
357+
assert_int_equal(ret, -FI_EINVAL);
358+
359+
efa_unit_test_buff_destruct(&local_buff1);
360+
efa_unit_test_buff_destruct(&local_buff2);
361+
}

prov/efa/test/efa_unit_tests.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,8 @@ int main(void)
370370
cmocka_unit_test_setup_teardown(test_efa_rma_inject_write, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
371371
cmocka_unit_test_setup_teardown(test_efa_rma_inject_writedata, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
372372
cmocka_unit_test_setup_teardown(test_efa_rma_writemsg_with_inject, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
373+
cmocka_unit_test_setup_teardown(test_efa_rma_readv_multiple_iov_einval, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
374+
cmocka_unit_test_setup_teardown(test_efa_rma_writev_multiple_iov_einval, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
373375
cmocka_unit_test_setup_teardown(test_efa_cq_read_no_completion, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
374376
cmocka_unit_test_setup_teardown(test_efa_cq_read_send_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
375377
cmocka_unit_test_setup_teardown(test_efa_cq_read_senddata_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),

prov/efa/test/efa_unit_tests.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,8 @@ void test_efa_rma_writedata();
332332
void test_efa_rma_inject_write();
333333
void test_efa_rma_inject_writedata();
334334
void test_efa_rma_writemsg_with_inject();
335+
void test_efa_rma_readv_multiple_iov_einval();
336+
void test_efa_rma_writev_multiple_iov_einval();
335337
void test_efa_cq_read_no_completion();
336338
void test_efa_cq_read_send_success();
337339
void test_efa_cq_read_senddata_success();

0 commit comments

Comments
 (0)