Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compile_commands.json
5 changes: 4 additions & 1 deletion csrc/engine/rdma/memory_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ int RDMAMemoryPool::register_memory_region(const std::string& mr_key, uintptr_t
/* MemoryRegion Access Right = 777 */
const static int access_rights = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
ibv_mr* mr = ibv_reg_mr(pd_, (void*)data_ptr, length, access_rights);

// std::cout << "length: " << length << std::endl;
// std::cout << "data_ptr " << data_ptr << std::endl;
// std::cout << "mr_key " << mr_key << std::endl;
// std::cout << "mr:" << mr << std::endl;
SLIME_ASSERT(mr, " Failed to register memory " << data_ptr);

SLIME_LOG_DEBUG("Memory region: " << mr_key << ", " << (void*)data_ptr << " -- " << (void*)(data_ptr + length)
Expand Down
4 changes: 2 additions & 2 deletions csrc/engine/rdma/memory_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ typedef struct remote_mr {
remote_mr(uintptr_t addr, size_t length, uint32_t rkey): addr(addr), length(length), rkey(rkey) {}

uintptr_t addr{(uintptr_t) nullptr};
size_t length{};
uint32_t rkey{};
size_t length{0};
uint32_t rkey{0};
} remote_mr_t;

class RDMAMemoryPool {
Expand Down
3 changes: 2 additions & 1 deletion csrc/engine/rdma/rdma_assignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ typedef struct callback_info {
callback_info() = default;
callback_info(OpCode opcode, size_t batch_size, callback_fn_t callback): opcode_(opcode), batch_size_(batch_size)
{
if (callback)
if (callback) {
callback_ = std::move(callback);
}
}

callback_fn_t callback_{[this](int code, int imm_data) {
Expand Down
9 changes: 5 additions & 4 deletions csrc/engine/rdma/rdma_buffer.cpp
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@

#include "engine/rdma/rdma_buffer.h"
#include "engine/assignment.h"

namespace slime {

void RDMABuffer::send()
{
endpoint_->addSendTask(shared_from_this());
endpoint_->addRDMABuffer(OpCode::SEND, shared_from_this());
}

void RDMABuffer::recv()
{
endpoint_->addRecvTask(shared_from_this());
endpoint_->addRDMABuffer(OpCode::RECV, shared_from_this());
}

void RDMABuffer::send_done_callback()
void RDMABuffer::sendDoneCallback()
{
std::unique_lock<std::mutex> lock(send_mutex_);
++send_completed_;
send_cv_.notify_all();
}

void RDMABuffer::recv_done_callback()
void RDMABuffer::recvDoneCallback()
{
std::unique_lock<std::mutex> lock(recv_mutex_);
++recv_completed_;
Expand Down
67 changes: 39 additions & 28 deletions csrc/engine/rdma/rdma_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "engine/rdma/rdma_endpoint.h"

#include <condition_variable>
#include <cstdint>
#include <infiniband/verbs.h>
#include <map>
#include <memory>
Expand All @@ -13,6 +14,7 @@
#include <unordered_map>
#include <vector>

#include "logging.h"
#include "rdma_common.h"

namespace slime {
Expand All @@ -23,36 +25,43 @@ class RDMABuffer: public std::enable_shared_from_this<RDMABuffer> {
friend class RDMAEndpoint;

public:
RDMABuffer(std::shared_ptr<RDMAEndpoint> endpoint, storage_view_batch_t& batch):
endpoint_(endpoint), storage_view_batch_(std::move(batch))
RDMABuffer(std::shared_ptr<RDMAEndpoint> endpoint, uintptr_t ptr, size_t offset, size_t data_size):
endpoint_(endpoint), ptr_(ptr), offset_(offset), data_size_(data_size)
{
SLIME_LOG_DEBUG("New RDMABuffer and the Index of Current Buffer is: ", buffer_counter_);
buffer_counter_ += 1;
}

RDMABuffer(std::shared_ptr<RDMAEndpoint> endpoint,
std::vector<uintptr_t> ptrs,
std::vector<size_t> offset,
std::vector<size_t> data_size)
RDMABuffer(std::shared_ptr<RDMAEndpoint> endpoint, storage_view_batch_t& batch):
endpoint_(endpoint), storage_view_batch_(std::move(batch))
{

batch_size_ = ptrs.size();
ptrs_ = ptrs;
offset_ = offset;
data_size_ = data_size;
for (uint32_t i = 0; i < batch_size_; ++i) {
storage_view_t view{.data_ptr = ptrs[i], .storage_offset = offset[i], .length = data_size[i]};
storage_view_batch_.push_back(view);
}
endpoint_ = endpoint;
}

// RDMABuffer(std::shared_ptr<RDMAEndpoint> endpoint,
// std::vector<uintptr_t> ptrs,
// std::vector<size_t> offset,
// std::vector<size_t> data_size)
// {

// batch_size_ = ptrs.size();
// ptrs_ = ptrs;
// offset_ = offset;
// data_size_ = data_size;
// for (uint32_t i = 0; i < batch_size_; ++i) {
// storage_view_t view{.data_ptr = ptrs[i], .storage_offset = offset[i], .length = data_size[i]};
// storage_view_batch_.push_back(view);
// }
// endpoint_ = endpoint;
// }

~RDMABuffer() = default;

const size_t batchSize()
const size_t batch_size()
{
return storage_view_batch_.size();
}

const storage_view_batch_t& storageViewBatch()
const storage_view_batch_t& view_batch()
{
return storage_view_batch_;
}
Expand All @@ -63,20 +72,20 @@ class RDMABuffer: public std::enable_shared_from_this<RDMABuffer> {
bool waitSend();
bool waitRecv();

void send_done_callback();
void recv_done_callback();

void get_time();
void sendDoneCallback();
void recvDoneCallback();

private:
storage_view_batch_t storage_view_batch_;

std::shared_ptr<RDMAEndpoint> endpoint_;
std::vector<uintptr_t> ptrs_;
std::vector<size_t> offset_;
std::vector<size_t> data_size_;
uintptr_t ptr_;
size_t offset_;
size_t data_size_;

size_t batch_size_;
std::vector<uintptr_t> ptrs_batch_;
std::vector<size_t> offset_batch_;
std::vector<size_t> data_size_batch_;

storage_view_batch_t storage_view_batch_;

std::atomic<int> send_pending_{0};
std::atomic<int> recv_pending_{0};
Expand All @@ -89,6 +98,8 @@ class RDMABuffer: public std::enable_shared_from_this<RDMABuffer> {

std::mutex send_mutex_;
std::mutex recv_mutex_;

static inline size_t buffer_counter_{0};
};

} // namespace slime
2 changes: 0 additions & 2 deletions csrc/engine/rdma/rdma_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,6 @@ void split_assign_by_max_length(OpCode opcode,
AssignmentBatch& batch_split_after_max_length,
size_t max_length)
{
// split assignment by length
for (size_t i = 0; i < batch.size(); ++i) {
if (batch[i].length < max_length) {
batch_split_after_max_length.push_back(std::move(batch[i]));
Expand Down Expand Up @@ -434,7 +433,6 @@ RDMAContext::submit(OpCode opcode, AssignmentBatch& batch, callback_fn_t callbac
size_t length = SLIME_MAX_LENGTH_PER_ASSIGNMENT;
AssignmentBatch batch_split;
split_assign_by_max_length(opcode, batch, batch_split, length);

AssignmentBatch batch_after_agg_qp;
while (batch_split.size() < SLIME_AGG_QP_NUM) {
length = length / 2;
Expand Down
Loading