Skip to content

Commit e95f32e

Browse files
committed
[v2.3.x] prov/efa: Log warnings only for internal OPE failures or if CQ error entry not written
(cherry picked from commit e86ff4f)
1 parent d27271e commit e95f32e

File tree

1 file changed

+36
-2
lines changed

1 file changed

+36
-2
lines changed

prov/efa/src/rdm/efa_rdm_ope.c

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ void efa_rdm_rxe_handle_error(struct efa_rdm_ope *rxe, int err, int prov_errno)
648648
err_entry.err_data = err_msg;
649649
}
650650

651-
EFA_WARN(FI_LOG_CQ, "err: %d, message: %s (%d)\n",
651+
EFA_INFO(FI_LOG_CQ, "err: %d, message: %s (%d)\n",
652652
err_entry.err,
653653
err_entry.err_data
654654
? (const char *) err_entry.err_data
@@ -663,6 +663,14 @@ void efa_rdm_rxe_handle_error(struct efa_rdm_ope *rxe, int err, int prov_errno)
663663
//efa_rdm_rxe_release(rxe);
664664

665665
if (rxe->internal_flags & EFA_RDM_OPE_INTERNAL) {
666+
EFA_WARN(FI_LOG_CQ, "EFA provider internal rxe failure err: %d, message: %s (%d)\n",
667+
err_entry.err,
668+
err_entry.err_data
669+
? (const char *) err_entry.err_data
670+
: efa_strerror(err_entry.prov_errno),
671+
err_entry.prov_errno);
672+
efa_show_help(err_entry.prov_errno);
673+
666674
EFA_WARN(FI_LOG_CQ,
667675
"Writing eq error for rxe from internal operations\n");
668676
efa_base_ep_write_eq_error(&ep->base_ep, err, prov_errno);
@@ -672,6 +680,14 @@ void efa_rdm_rxe_handle_error(struct efa_rdm_ope *rxe, int err, int prov_errno)
672680
efa_cntr_report_error(&ep->base_ep.util_ep, err_entry.flags);
673681
write_cq_err = ofi_cq_write_error(util_cq, &err_entry);
674682
if (write_cq_err) {
683+
EFA_WARN(FI_LOG_CQ, "Failed to write CQ error entry for rxe err: %d, message: %s (%d)\n",
684+
err_entry.err,
685+
err_entry.err_data
686+
? (const char *) err_entry.err_data
687+
: efa_strerror(err_entry.prov_errno),
688+
err_entry.prov_errno);
689+
efa_show_help(err_entry.prov_errno);
690+
675691
EFA_WARN(FI_LOG_CQ,
676692
"Error writing error cq entry when handling RX error\n");
677693
efa_base_ep_write_eq_error(&ep->base_ep, err, prov_errno);
@@ -757,7 +773,7 @@ void efa_rdm_txe_handle_error(struct efa_rdm_ope *txe, int err, int prov_errno)
757773
err_entry.err_data = err_msg;
758774
}
759775

760-
EFA_WARN(FI_LOG_CQ, "err: %d, message: %s (%d)\n",
776+
EFA_INFO(FI_LOG_CQ, "err: %d, message: %s (%d)\n",
761777
err_entry.err,
762778
err_entry.err_data
763779
? (const char *) err_entry.err_data
@@ -774,6 +790,15 @@ void efa_rdm_txe_handle_error(struct efa_rdm_ope *txe, int err, int prov_errno)
774790
//efa_rdm_txe_release(txe);
775791

776792
if (txe->internal_flags & EFA_RDM_OPE_INTERNAL) {
793+
EFA_WARN(FI_LOG_CQ, "EFA provider internal txe failure err: %d, message: %s (%d)\n",
794+
err_entry.err,
795+
err_entry.err_data
796+
? (const char *) err_entry.err_data
797+
: efa_strerror(err_entry.prov_errno),
798+
err_entry.prov_errno);
799+
800+
efa_show_help(err_entry.prov_errno);
801+
777802
EFA_WARN(FI_LOG_CQ,
778803
"Writing eq error for txe from internal operations\n");
779804
efa_base_ep_write_eq_error(&ep->base_ep, err, prov_errno);
@@ -783,6 +808,15 @@ void efa_rdm_txe_handle_error(struct efa_rdm_ope *txe, int err, int prov_errno)
783808
efa_cntr_report_error(&ep->base_ep.util_ep, txe->cq_entry.flags);
784809
write_cq_err = ofi_cq_write_error(util_cq, &err_entry);
785810
if (write_cq_err) {
811+
EFA_WARN(FI_LOG_CQ, "Failed to write CQ error entry for txe err: %d, message: %s (%d)\n",
812+
err_entry.err,
813+
err_entry.err_data
814+
? (const char *) err_entry.err_data
815+
: efa_strerror(err_entry.prov_errno),
816+
err_entry.prov_errno);
817+
818+
efa_show_help(err_entry.prov_errno);
819+
786820
EFA_WARN(FI_LOG_CQ,
787821
"Error writing error cq entry when handling TX error\n");
788822
efa_base_ep_write_eq_error(&ep->base_ep, err, prov_errno);

0 commit comments

Comments
 (0)