diff --git a/prov/efa/src/efa.h b/prov/efa/src/efa.h index a71b9fde460..f936d8fb084 100644 --- a/prov/efa/src/efa.h +++ b/prov/efa/src/efa.h @@ -112,6 +112,15 @@ */ #define EFA_RDM_BUFPOOL_ALIGNMENT (64) + +/** + * The hard-coded rdma sge num limit + * due to the structs layout rdma wqes. + * See `struct efa_io_rdma_req` in efa_io_defs.h + */ +#define EFA_DEVICE_MAX_RDMA_SGE 1 + + struct efa_fabric { struct util_fabric util_fabric; struct fid_fabric *shm_fabric; diff --git a/prov/efa/src/efa_rma.c b/prov/efa/src/efa_rma.c index 792178eeaf5..6a64b91a9a0 100644 --- a/prov/efa/src/efa_rma.c +++ b/prov/efa/src/efa_rma.c @@ -21,6 +21,24 @@ static inline int efa_rma_check_cap(struct efa_base_ep *base_ep) { return -FI_EOPNOTSUPP; } +/** + * @brief Check whether the device supports the given iov_count for rdma work request + * + * @param base_ep pointer of efa_base_ep + * @param count the iov count of the msg + * @return int 0 on success, -FI_EOPNOTSUPP on failure + */ +static inline int efa_rma_check_iov_count(struct efa_base_ep *base_ep, size_t count) +{ + size_t max_sge = MIN(EFA_DEVICE_MAX_RDMA_SGE, base_ep->domain->device->ibv_attr.max_sge_rd); + + if (OFI_UNLIKELY(count > max_sge)) { + EFA_WARN(FI_LOG_EP_DATA, "EFA device currently doesn't support > %zu iov for rdma work request\n", max_sge); + return -FI_EOPNOTSUPP; + } + return 0; +} + /* * efa_rma_post_read() will post a read request. * @@ -56,8 +74,6 @@ static inline ssize_t efa_rma_post_read(struct efa_base_ep *base_ep, ofi_total_iov_len(msg->msg_iov, msg->iov_count), msg->addr, (size_t) msg->context, flags); - assert(msg->iov_count > 0 && - msg->iov_count <= base_ep->domain->info->tx_attr->iov_limit); assert(msg->rma_iov_count > 0 && msg->rma_iov_count <= base_ep->domain->info->tx_attr->rma_iov_limit); assert(ofi_total_iov_len(msg->msg_iov, msg->iov_count) <= @@ -124,6 +140,10 @@ ssize_t efa_rma_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uin if (err) return err; + err = efa_rma_check_iov_count(base_ep, msg->iov_count); + if (err) + return err; + return efa_rma_post_read(base_ep, msg, flags | base_ep->util_ep.tx_msg_flags); } @@ -143,6 +163,10 @@ ssize_t efa_rma_readv(struct fid_ep *ep_fid, const struct iovec *iov, void **des if (err) return err; + err = efa_rma_check_iov_count(base_ep, iov_count); + if (err) + return err; + len = ofi_total_iov_len(iov, iov_count); EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); EFA_SETUP_MSG_RMA(msg, iov, desc, iov_count, src_addr, &rma_iov, 1, @@ -277,6 +301,10 @@ ssize_t efa_rma_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, if (err) return err; + err = efa_rma_check_iov_count(base_ep, msg->iov_count); + if (err) + return err; + return efa_rma_post_write(base_ep, msg, flags | base_ep->util_ep.tx_msg_flags); } @@ -295,6 +323,10 @@ ssize_t efa_rma_writev(struct fid_ep *ep_fid, const struct iovec *iov, if (err) return err; + err = efa_rma_check_iov_count(base_ep, iov_count); + if (err) + return err; + len = ofi_total_iov_len(iov, iov_count); EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); EFA_SETUP_MSG_RMA(msg, iov, desc, iov_count, dest_addr, &rma_iov, 1, diff --git a/prov/efa/test/efa_unit_test_rma.c b/prov/efa/test/efa_unit_test_rma.c index f827b723fde..ec2d9295243 100644 --- a/prov/efa/test/efa_unit_test_rma.c +++ b/prov/efa/test/efa_unit_test_rma.c @@ -19,6 +19,8 @@ static void test_efa_rma_prep(struct efa_resource *resource, fi_addr_t *addr) base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); /* Add rma caps explicitly to ep->info to allow local test */ base_ep->info->caps |= FI_RMA; + /* Mock the max rdma sge to allow local test */ + base_ep->domain->device->ibv_attr.max_sge_rd = EFA_DEVICE_MAX_RDMA_SGE; /* Set up the mock operations */ g_efa_unit_test_mocks.efa_qp_post_recv = &efa_mock_efa_qp_post_recv_return_mock; g_efa_unit_test_mocks.efa_qp_wr_complete = &efa_mock_efa_qp_wr_complete_no_op; @@ -299,3 +301,63 @@ void test_efa_rma_writemsg_with_inject(struct efa_resource **state) efa_unit_test_buff_destruct(&local_buff); } + +void test_efa_rma_readv_multiple_iov_fail(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_unit_test_buff local_buff1, local_buff2; + struct iovec iov[2]; + fi_addr_t src_addr; + void *desc[2]; + int ret; + uint64_t remote_addr = 0x87654321; + uint64_t remote_key = 123456; + + test_efa_rma_prep(resource, &src_addr); + efa_unit_test_buff_construct(&local_buff1, resource, 2048); + efa_unit_test_buff_construct(&local_buff2, resource, 2048); + + iov[0].iov_base = local_buff1.buff; + iov[0].iov_len = local_buff1.size; + iov[1].iov_base = local_buff2.buff; + iov[1].iov_len = local_buff2.size; + desc[0] = fi_mr_desc(local_buff1.mr); + desc[1] = fi_mr_desc(local_buff2.mr); + + ret = fi_readv(resource->ep, iov, desc, 2, src_addr, remote_addr, + remote_key, NULL); + assert_int_equal(ret, -FI_EOPNOTSUPP); + + efa_unit_test_buff_destruct(&local_buff1); + efa_unit_test_buff_destruct(&local_buff2); +} + +void test_efa_rma_writev_multiple_iov_fail(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_unit_test_buff local_buff1, local_buff2; + struct iovec iov[2]; + fi_addr_t dest_addr; + void *desc[2]; + int ret; + uint64_t remote_addr = 0x87654321; + uint64_t remote_key = 123456; + + test_efa_rma_prep(resource, &dest_addr); + efa_unit_test_buff_construct(&local_buff1, resource, 2048); + efa_unit_test_buff_construct(&local_buff2, resource, 2048); + + iov[0].iov_base = local_buff1.buff; + iov[0].iov_len = local_buff1.size; + iov[1].iov_base = local_buff2.buff; + iov[1].iov_len = local_buff2.size; + desc[0] = fi_mr_desc(local_buff1.mr); + desc[1] = fi_mr_desc(local_buff2.mr); + + ret = fi_writev(resource->ep, iov, desc, 2, dest_addr, remote_addr, + remote_key, NULL); + assert_int_equal(ret, -FI_EOPNOTSUPP); + + efa_unit_test_buff_destruct(&local_buff1); + efa_unit_test_buff_destruct(&local_buff2); +} diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 70e0792fcb1..3e0150086aa 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -370,6 +370,8 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rma_inject_write, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rma_inject_writedata, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rma_writemsg_with_inject, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rma_readv_multiple_iov_fail, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rma_writev_multiple_iov_fail, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_no_completion, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_send_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_senddata_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 4ff878fd884..15246c4c466 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -332,6 +332,8 @@ void test_efa_rma_writedata(); void test_efa_rma_inject_write(); void test_efa_rma_inject_writedata(); void test_efa_rma_writemsg_with_inject(); +void test_efa_rma_readv_multiple_iov_fail(); +void test_efa_rma_writev_multiple_iov_fail(); void test_efa_cq_read_no_completion(); void test_efa_cq_read_send_success(); void test_efa_cq_read_senddata_success();