From 925551cac25ee5e6e3cbbc54e57e4ada5b7c5934 Mon Sep 17 00:00:00 2001 From: Sunita Nadampalli Date: Tue, 23 Sep 2025 14:25:13 +0000 Subject: [PATCH] prov/shm: make smr buffer pool allocations cache line aligned Fixed the smr buffer pool alignment to match cache line size on x86 and aarch64 cpus. This has improved the p99 latencies and also throughput for nccl collective tests. Signed-off-by: Sunita Nadampalli --- prov/shm/src/smr.h | 5 +++++ prov/shm/src/smr_ep.c | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/prov/shm/src/smr.h b/prov/shm/src/smr.h index 36e346f30d4..55e8f12cad2 100644 --- a/prov/shm/src/smr.h +++ b/prov/shm/src/smr.h @@ -72,6 +72,11 @@ #ifndef _SMR_H_ #define _SMR_H_ +/* + * Set alignment to aarch64 and x86 cache line size. + */ +#define SHM_SMR_BUFPOOL_ALIGNMENT (64) + struct smr_env { size_t sar_threshold; int disable_cma; diff --git a/prov/shm/src/smr_ep.c b/prov/shm/src/smr_ep.c index 48cee2f7c52..ccfb2246e03 100644 --- a/prov/shm/src/smr_ep.c +++ b/prov/shm/src/smr_ep.c @@ -1265,7 +1265,7 @@ static int smr_create_pools(struct smr_ep *ep, struct fi_info *info) int ret; ret = ofi_bufpool_create(&ep->cmd_ctx_pool, sizeof(struct smr_cmd_ctx), - 16, 0, info->rx_attr->size, + SHM_SMR_BUFPOOL_ALIGNMENT, 0, info->rx_attr->size, OFI_BUFPOOL_NO_TRACK); if (ret) goto err; @@ -1276,13 +1276,13 @@ static int smr_create_pools(struct smr_ep *ep, struct fi_info *info) ret = ofi_bufpool_create(&ep->unexp_buf_pool, sizeof(struct smr_unexp_buf), - 16, 0, 4, OFI_BUFPOOL_NO_TRACK); + SHM_SMR_BUFPOOL_ALIGNMENT, 0, 4, OFI_BUFPOOL_NO_TRACK); if (ret) goto free2; ret = ofi_bufpool_create(&ep->pend_buf_pool, sizeof(struct smr_pend_entry), - 16, 0, 4, OFI_BUFPOOL_NO_TRACK); + SHM_SMR_BUFPOOL_ALIGNMENT, 0, 4, OFI_BUFPOOL_NO_TRACK); if (ret) goto free1;