Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions prov/efa/src/efa.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@
*/
#define EFA_RDM_BUFPOOL_ALIGNMENT (64)


/**
* The hard-coded rdma sge num limit
* due to the structs layout rdma wqes.
* See `struct efa_io_rdma_req` in efa_io_defs.h
*/
#define EFA_DEVICE_MAX_RDMA_SGE 1


struct efa_fabric {
struct util_fabric util_fabric;
struct fid_fabric *shm_fabric;
Expand Down
36 changes: 34 additions & 2 deletions prov/efa/src/efa_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,24 @@ static inline int efa_rma_check_cap(struct efa_base_ep *base_ep) {
return -FI_EOPNOTSUPP;
}

/**
* @brief Check whether the device supports the given iov_count for rdma work request
*
* @param base_ep pointer of efa_base_ep
* @param count the iov count of the msg
* @return int 0 on success, -FI_EOPNOTSUPP on failure
*/
static inline int efa_rma_check_iov_count(struct efa_base_ep *base_ep, size_t count)
{
size_t max_sge = MIN(EFA_DEVICE_MAX_RDMA_SGE, base_ep->domain->device->ibv_attr.max_sge_rd);

if (OFI_UNLIKELY(count > max_sge)) {
EFA_WARN(FI_LOG_EP_DATA, "EFA device currently doesn't support > %zu iov for rdma work request\n", max_sge);
return -FI_EOPNOTSUPP;
}
return 0;
}

/*
* efa_rma_post_read() will post a read request.
*
Expand Down Expand Up @@ -56,8 +74,6 @@ static inline ssize_t efa_rma_post_read(struct efa_base_ep *base_ep,
ofi_total_iov_len(msg->msg_iov, msg->iov_count),
msg->addr, (size_t) msg->context, flags);

assert(msg->iov_count > 0 &&
msg->iov_count <= base_ep->domain->info->tx_attr->iov_limit);
assert(msg->rma_iov_count > 0 &&
msg->rma_iov_count <= base_ep->domain->info->tx_attr->rma_iov_limit);
assert(ofi_total_iov_len(msg->msg_iov, msg->iov_count) <=
Expand Down Expand Up @@ -124,6 +140,10 @@ ssize_t efa_rma_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uin
if (err)
return err;

err = efa_rma_check_iov_count(base_ep, msg->iov_count);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be called from efa_data_path_direct_wr_set_sge_list or adjacent to the calls to this function since 1) it's directly related to WQE format, 2) we should avoid enforcing Libfabric hardcoded limits when using rdma-core data path.

if (err)
return err;

return efa_rma_post_read(base_ep, msg, flags | base_ep->util_ep.tx_msg_flags);
}

Expand All @@ -143,6 +163,10 @@ ssize_t efa_rma_readv(struct fid_ep *ep_fid, const struct iovec *iov, void **des
if (err)
return err;

err = efa_rma_check_iov_count(base_ep, iov_count);
if (err)
return err;

len = ofi_total_iov_len(iov, iov_count);
EFA_SETUP_RMA_IOV(rma_iov, addr, len, key);
EFA_SETUP_MSG_RMA(msg, iov, desc, iov_count, src_addr, &rma_iov, 1,
Expand Down Expand Up @@ -277,6 +301,10 @@ ssize_t efa_rma_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
if (err)
return err;

err = efa_rma_check_iov_count(base_ep, msg->iov_count);
if (err)
return err;

return efa_rma_post_write(base_ep, msg, flags | base_ep->util_ep.tx_msg_flags);
}

Expand All @@ -295,6 +323,10 @@ ssize_t efa_rma_writev(struct fid_ep *ep_fid, const struct iovec *iov,
if (err)
return err;

err = efa_rma_check_iov_count(base_ep, iov_count);
if (err)
return err;

len = ofi_total_iov_len(iov, iov_count);
EFA_SETUP_RMA_IOV(rma_iov, addr, len, key);
EFA_SETUP_MSG_RMA(msg, iov, desc, iov_count, dest_addr, &rma_iov, 1,
Expand Down
62 changes: 62 additions & 0 deletions prov/efa/test/efa_unit_test_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ static void test_efa_rma_prep(struct efa_resource *resource, fi_addr_t *addr)
base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid);
/* Add rma caps explicitly to ep->info to allow local test */
base_ep->info->caps |= FI_RMA;
/* Mock the max rdma sge to allow local test */
base_ep->domain->device->ibv_attr.max_sge_rd = EFA_DEVICE_MAX_RDMA_SGE;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you really want to test your minimum you shuold put a larger value

/* Set up the mock operations */
g_efa_unit_test_mocks.efa_qp_post_recv = &efa_mock_efa_qp_post_recv_return_mock;
g_efa_unit_test_mocks.efa_qp_wr_complete = &efa_mock_efa_qp_wr_complete_no_op;
Expand Down Expand Up @@ -299,3 +301,63 @@ void test_efa_rma_writemsg_with_inject(struct efa_resource **state)

efa_unit_test_buff_destruct(&local_buff);
}

void test_efa_rma_readv_multiple_iov_fail(struct efa_resource **state)
{
struct efa_resource *resource = *state;
struct efa_unit_test_buff local_buff1, local_buff2;
struct iovec iov[2];
fi_addr_t src_addr;
void *desc[2];
int ret;
uint64_t remote_addr = 0x87654321;
uint64_t remote_key = 123456;

test_efa_rma_prep(resource, &src_addr);
efa_unit_test_buff_construct(&local_buff1, resource, 2048);
efa_unit_test_buff_construct(&local_buff2, resource, 2048);

iov[0].iov_base = local_buff1.buff;
iov[0].iov_len = local_buff1.size;
iov[1].iov_base = local_buff2.buff;
iov[1].iov_len = local_buff2.size;
desc[0] = fi_mr_desc(local_buff1.mr);
desc[1] = fi_mr_desc(local_buff2.mr);

ret = fi_readv(resource->ep, iov, desc, 2, src_addr, remote_addr,
remote_key, NULL);
assert_int_equal(ret, -FI_EOPNOTSUPP);

efa_unit_test_buff_destruct(&local_buff1);
efa_unit_test_buff_destruct(&local_buff2);
}

void test_efa_rma_writev_multiple_iov_fail(struct efa_resource **state)
{
struct efa_resource *resource = *state;
struct efa_unit_test_buff local_buff1, local_buff2;
struct iovec iov[2];
fi_addr_t dest_addr;
void *desc[2];
int ret;
uint64_t remote_addr = 0x87654321;
uint64_t remote_key = 123456;

test_efa_rma_prep(resource, &dest_addr);
efa_unit_test_buff_construct(&local_buff1, resource, 2048);
efa_unit_test_buff_construct(&local_buff2, resource, 2048);

iov[0].iov_base = local_buff1.buff;
iov[0].iov_len = local_buff1.size;
iov[1].iov_base = local_buff2.buff;
iov[1].iov_len = local_buff2.size;
desc[0] = fi_mr_desc(local_buff1.mr);
desc[1] = fi_mr_desc(local_buff2.mr);

ret = fi_writev(resource->ep, iov, desc, 2, dest_addr, remote_addr,
remote_key, NULL);
assert_int_equal(ret, -FI_EOPNOTSUPP);

efa_unit_test_buff_destruct(&local_buff1);
efa_unit_test_buff_destruct(&local_buff2);
}
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,8 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rma_inject_write, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rma_inject_writedata, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rma_writemsg_with_inject, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rma_readv_multiple_iov_fail, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rma_writev_multiple_iov_fail, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_cq_read_no_completion, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_cq_read_send_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_cq_read_senddata_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,8 @@ void test_efa_rma_writedata();
void test_efa_rma_inject_write();
void test_efa_rma_inject_writedata();
void test_efa_rma_writemsg_with_inject();
void test_efa_rma_readv_multiple_iov_fail();
void test_efa_rma_writev_multiple_iov_fail();
void test_efa_cq_read_no_completion();
void test_efa_cq_read_send_success();
void test_efa_cq_read_senddata_success();
Expand Down