Skip to content

Commit c610704

Browse files
authored
tcp/context: use mutex to avoid race condition when multiple operations are running
Differential Revision: D72599396 Pull Request resolved: #427
1 parent 0799996 commit c610704

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

gloo/transport/tcp/context.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ std::unique_ptr<transport::Pair>& Context::getPair(int rank) {
177177
return pair;
178178
}
179179

180+
std::lock_guard<std::mutex> lock(m_);
181+
180182
if (!connecting_[rank]) {
181183
connecting_[rank] = true;
182184

@@ -187,6 +189,7 @@ std::unique_ptr<transport::Pair>& Context::getPair(int rank) {
187189

188190
auto remoteDeviceAddr = Address(remoteRankInfo.addressBytes).getSockaddr();
189191
auto remoteAddr = Address(remoteDeviceAddr, this->rank);
192+
// Actual connection happens asynchronously.
190193
pair->connect(remoteAddr.bytes());
191194
}
192195
return pair;

gloo/transport/tcp/context.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ class Context : public ::gloo::transport::Context,
5252
protected:
5353
std::shared_ptr<Device> device_;
5454
std::shared_ptr<IStore> store_{nullptr};
55+
56+
// Protects the connection states to avoid race conditions.
57+
std::mutex m_;
58+
// Whether or not connection has been started for this peer.
5559
std::vector<bool> connecting_;
5660

5761
using pendingRecvTuple = std::tuple<

0 commit comments

Comments
 (0)