diff --git a/CMakeLists.txt b/CMakeLists.txt index cb40a90e4b..3a58db0de7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,18 +13,24 @@ include(${CMAKE_SOURCE_DIR}/cmake/maintainer_mode.cmake) project(NTIRPC C) # version numbers -set(NTIRPC_MAJOR_VERSION 1) -set(NTIRPC_MINOR_VERSION 7) -set(NTIRPC_PATCH_LEVEL 1) +set(NTIRPC_MAJOR_VERSION 3) +# This is .0 for a release, .N for a stable branch, blank for development +set(NTIRPC_MINOR_VERSION .0) +# -something for dev releases +set(NTIRPC_VERSION_EXTRA ) set(VERSION_COMMENT "Full-duplex and bi-directional ONC RPC on TCP." ) # version string used for packaging set(NTIRPC_VERSION - "${NTIRPC_MAJOR_VERSION}.${NTIRPC_MINOR_VERSION}.${NTIRPC_PATCH_LEVEL}") + "${NTIRPC_MAJOR_VERSION}${NTIRPC_MINOR_VERSION}${NTIRPC_VERSION_EXTRA}") +set(NTIRPC_VERSION_BASE + "${NTIRPC_MAJOR_VERSION}${NTIRPC_MINOR_VERSION}") # Up scope for embedding in ganesha set(NTIRPC_VERSION_EMBED "${NTIRPC_VERSION}" PARENT_SCOPE) -set(NTIRPC_ABI_EMBED "${NTIRPC_MAJOR_VERSION}.${NTIRPC_MINOR_VERSION}" PARENT_SCOPE) +set(NTIRPC_VERSION_BASE_EMBED "${NTIRPC_VERSION_BASE}" PARENT_SCOPE) +set(NTIRPC_VERSION_EXTRA_EMBED "${NTIRPC_VERSION_EXTRA}" PARENT_SCOPE) +set(NTIRPC_ABI_EMBED "${NTIRPC_MAJOR_VERSION}${NTIRPC_MINOR_VERSION}" PARENT_SCOPE) set( PACKNAME "${NTIRPC_VERSION}" ) @@ -180,6 +186,8 @@ include_directories(BEFORE "${EXTRA_INCLUDE_DIR}" ) +find_library(LIBURCU urcu-bp) + # Find misc system libs find_library(LIBRT rt) # extended Pthreads functions @@ -188,6 +196,7 @@ set(SYSTEM_LIBRARIES ${KRB5_LIBRARIES} ${SYSTEM_LIBRARIES} ${LIBRT} + ${LIBURCU} ) if (NOT FREEBSD) diff --git a/cmake/modules/FindLTTng.cmake b/cmake/modules/FindLTTng.cmake index bddecad4d2..6e850f8eec 100644 --- a/cmake/modules/FindLTTng.cmake +++ b/cmake/modules/FindLTTng.cmake @@ -40,10 +40,9 @@ find_path(LTTNG_LIBRARY_DIR DOC "The LTTng libraries") find_library(LTTNG_UST_LIBRARY lttng-ust PATHS ${LTTNG_LIBRARY_DIR}) -find_library(URCU_LIBRARY urcu-bp PATHS ${LTTNG_LIBRARY_DIR}) find_library(UUID_LIBRARY uuid) -set(LTTNG_LIBRARIES ${LTTNG_UST_LIBRARY} ${URCU_LIBRARY} ${UUID_LIBRARY}) +set(LTTNG_LIBRARIES ${LTTNG_UST_LIBRARY} ${UUID_LIBRARY}) find_path(LTTNG_CTL_INCLUDE_DIR NAMES lttng/lttng.h diff --git a/config-h.in.cmake b/config-h.in.cmake index e259d6e9ae..8eb2ddf7bd 100644 --- a/config-h.in.cmake +++ b/config-h.in.cmake @@ -25,9 +25,9 @@ #define PACKAGE "libntirpc" #define PACKAGE_BUGREPORT "" #define PACKAGE_NAME "libntirpc" -#define PACKAGE_STRING "libntirpc ${NTIRPC_VERSION_MAJOR}.${NTIRPC_VERSION_MINOR}.${NTIRPC_PATCH_LEVEL}" +#define PACKAGE_STRING "libntirpc ${NTIRPC_VERSION_MAJOR}${NTIRPC_VERSION_MINOR}" #define PACKAGE_TARNAME "libntirpc" #define PACKAGE_URL "" -#define PACKAGE_VERSION "${NTIRPC_VERSION_MAJOR}.${NTIRPC_VERSION_MINOR}.${NTIRPC_PATCH_LEVEL}" +#define PACKAGE_VERSION "${NTIRPC_VERSION_MAJOR}${NTIRPC_VERSION_MINOR}" #endif /* CONFIG_H */ diff --git a/ntirpc/lttng/xprt.h b/ntirpc/lttng/xprt.h index b5b62c0662..d418a4c319 100644 --- a/ntirpc/lttng/xprt.h +++ b/ntirpc/lttng/xprt.h @@ -1,7 +1,7 @@ /* * vim:noexpandtab:shiftwidth=8:tabstop=8: * - * Copyright 2018 Red Hat, Inc. and/or its affiliates. + * Copyright 2018-2019 Red Hat, Inc. and/or its affiliates. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -79,6 +79,314 @@ TRACEPOINT_LOGLEVEL( unref, TRACE_INFO) +TRACEPOINT_EVENT( + xprt, + destroy, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + uint16_t, flags), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer_hex(uint16_t, flags, flags) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + destroy, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + unhook, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + uint32_t, flags), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer_hex(uint32_t, flags, flags) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + unhook, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + rearm, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + uint32_t, flags), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer_hex(uint32_t, flags, flags) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + rearm, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + hook, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + uint32_t, flags), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer_hex(uint32_t, flags, flags) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + hook, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + event, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + uint32_t, xp_flags, + uint32_t, ev_flag), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer_hex(uint32_t, xp_flags, xp_flags) + ctf_integer_hex(uint32_t, ev_flag, ev_flag) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + event, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + recv, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + unsigned int, destroyed, + unsigned int, count), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer(unsigned int, destroyed, destroyed) + ctf_integer(unsigned int, count, count) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + recv, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + send, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + unsigned int, destroyed, + unsigned int, count), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer(unsigned int, destroyed, destroyed) + ctf_integer(unsigned int, count, count) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + send, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + write_blocked, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + write_blocked, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + write_complete, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + unsigned int, has_blocked), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer(unsigned int, has_blocked, has_blocked) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + write_complete, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + sendmsg, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + unsigned int, remaining, + unsigned int, frag_needed, + unsigned int, iov_count), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer(unsigned int, remaining, remaining) + ctf_integer(unsigned int, frag_needed, frag_needed) + ctf_integer(unsigned int, iov_count, iov_count) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + sendmsg, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + mutex, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + mutex, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + funcin, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + funcin, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + recv_frag, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + int32_t, frag_len), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer_hex(int32_t, frag_len, frag_len) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + recv_frag, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + recv_bytes, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + int32_t, frag_remain, + ssize_t, bytes), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_integer_hex(int32_t, frag_remain, frag_remain) + ctf_integer_hex(ssize_t, bytes, bytes) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + recv_bytes, + TRACE_INFO) + +TRACEPOINT_EVENT( + xprt, + recv_exit, + TP_ARGS(const char *, function, + unsigned int, line, + void *, xprt, + const char *, reason, + int, code), + TP_FIELDS( + ctf_string(fnc, function) + ctf_integer(unsigned int, line, line) + ctf_integer_hex(void *, xprt, xprt) + ctf_string(reason, reason) + ctf_integer(int, code, code) + ) +) + +TRACEPOINT_LOGLEVEL( + xprt, + recv_exit, + TRACE_INFO) + #endif /* GANESHA_LTTNG_XPRT_TP_H */ #undef TRACEPOINT_INCLUDE diff --git a/ntirpc/misc/timespec.h b/ntirpc/misc/timespec.h index 574a41a68f..5b04a45941 100644 --- a/ntirpc/misc/timespec.h +++ b/ntirpc/misc/timespec.h @@ -40,19 +40,24 @@ /* Operations on timespecs */ #define timespecclear(tvp) ((tvp)->tv_sec = (tvp)->tv_nsec = 0) #define timespecisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec) -#define timespeccmp(tvp, uvp, cmp) \ - (((tvp)->tv_sec == (uvp)->tv_sec) ? \ - ((tvp)->tv_nsec cmp(uvp)->tv_nsec) : \ - ((tvp)->tv_sec cmp(uvp)->tv_sec)) -#define timespecadd(vvp, uvp) \ - do { \ - (vvp)->tv_sec += (uvp)->tv_sec; \ - (vvp)->tv_nsec += (uvp)->tv_nsec; \ - if ((vvp)->tv_nsec >= 1000000000) { \ - (vvp)->tv_sec++; \ - (vvp)->tv_nsec -= 1000000000; \ - } \ +#ifndef timespeccmp +#define timespeccmp(tvp, uvp, cmp) \ + (((tvp)->tv_sec == (uvp)->tv_sec) ? \ + ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ + ((tvp)->tv_sec cmp (uvp)->tv_sec)) +#endif /* timespeccmp */ +#ifndef timespecadd +#define timespecadd(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec; \ + if ((vsp)->tv_nsec >= 1000000000L) { \ + (vsp)->tv_sec++; \ + (vsp)->tv_nsec -= 1000000000L; \ + } \ } while (0) +#endif /* timespecadd */ + #define timespec_adds(vvp, s) \ do { \ (vvp)->tv_sec += s; \ @@ -67,13 +72,17 @@ (vvp)->tv_nsec -= 1000000000; \ } \ } while (0) -#define timespecsub(vvp, uvp) \ - do { \ - (vvp)->tv_sec -= (uvp)->tv_sec; \ - (vvp)->tv_nsec -= (uvp)->tv_nsec; \ - if ((vvp)->tv_nsec < 0) { \ - (vvp)->tv_sec--; \ - (vvp)->tv_nsec += 1000000000; \ - } \ + +#ifndef timespecsub +#define timespecsub(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ + if ((vsp)->tv_nsec < 0) { \ + (vsp)->tv_sec--; \ + (vsp)->tv_nsec += 1000000000L; \ + } \ } while (0) +#endif /* timespecsub */ + #endif /* TIMESPEC_H */ diff --git a/ntirpc/rpc/auth_gss.h b/ntirpc/rpc/auth_gss.h index 354f28b9f7..84bffaae3f 100644 --- a/ntirpc/rpc/auth_gss.h +++ b/ntirpc/rpc/auth_gss.h @@ -105,6 +105,8 @@ struct rpc_gss_init_res { /* Maximum sequence number value. */ #define RPCSEC_GSS_MAXSEQ 0x80000000 +typedef void (*checksum_func_t) (void *priv, void *databuf, size_t length); + /* Prototypes. */ __BEGIN_DECLS bool xdr_rpc_gss_cred(XDR *xdrs, struct rpc_gss_cred *p); @@ -115,7 +117,7 @@ bool xdr_rpc_gss_wrap(XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr, u_int seq); bool xdr_rpc_gss_unwrap(XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr, gss_ctx_id_t ctx, gss_qop_t qop, rpc_gss_svc_t svc, - u_int seq); + u_int seq, checksum_func_t checksum_func, void *priv); bool xdr_rpc_gss_encode(XDR *xdrs, gss_buffer_t buf, u_int maxsize); bool xdr_rpc_gss_decode(XDR *xdrs, gss_buffer_t buf); diff --git a/ntirpc/rpc/clnt.h b/ntirpc/rpc/clnt.h index 49eda22638..d442cbfd4a 100644 --- a/ntirpc/rpc/clnt.h +++ b/ntirpc/rpc/clnt.h @@ -182,6 +182,7 @@ struct rpc_timers { #define CLNT_FLAG_DESTROYING SVC_XPRT_FLAG_DESTROYING #define CLNT_FLAG_RELEASING SVC_XPRT_FLAG_RELEASING #define CLNT_FLAG_DESTROYED SVC_XPRT_FLAG_DESTROYED +#define CLNT_FLAG_LOCAL 0x8000 /* Client is unshared/local */ /* * CLNT_REF flags diff --git a/ntirpc/rpc/gss_internal.h b/ntirpc/rpc/gss_internal.h index d17b6588dd..d20fbd9edc 100644 --- a/ntirpc/rpc/gss_internal.h +++ b/ntirpc/rpc/gss_internal.h @@ -119,6 +119,8 @@ unref_svc_rpc_gss_data(struct svc_rpc_gss_data *gd) /* if refcnt is 0, gd is not reachable */ if (unlikely(atomic_dec_uint32_t(&gd->refcnt) == 0)) { svcauth_gss_destroy(gd->auth); + /* gd was unlocked and freed. */ + return; } mutex_unlock(&gd->lock); diff --git a/ntirpc/rpc/svc.h b/ntirpc/rpc/svc.h index 27fa29416a..d3eded4315 100644 --- a/ntirpc/rpc/svc.h +++ b/ntirpc/rpc/svc.h @@ -60,6 +60,7 @@ typedef struct svc_xprt SVCXPRT; enum xprt_stat { XPRT_IDLE = 0, XPRT_MOREREQS, + XPRT_SUSPEND, /* always last in this order for comparisons */ XPRT_DIED, XPRT_DESTROYED @@ -118,11 +119,13 @@ enum xprt_stat { #define RPC_SVC_FDSET_SET 5 typedef enum xprt_stat (*svc_xprt_fun_t) (SVCXPRT *); -typedef enum xprt_stat (*svc_xprt_xdr_fun_t) (SVCXPRT *, XDR *); +typedef struct svc_req *(*svc_xprt_alloc_fun_t) (SVCXPRT *, XDR *); +typedef void (*svc_xprt_free_fun_t) (struct svc_req *, enum xprt_stat); typedef struct svc_init_params { svc_xprt_fun_t disconnect_cb; - svc_xprt_xdr_fun_t request_cb; + svc_xprt_alloc_fun_t alloc_cb; + svc_xprt_free_fun_t free_cb; u_long flags; u_int max_connections; /* xprts */ @@ -148,7 +151,8 @@ typedef struct svc_init_params { #define SVC_XPRT_FLAG_NONE 0x0000 /* uint16_t actually used */ -#define SVC_XPRT_FLAG_ADDED 0x0001 +#define SVC_XPRT_FLAG_ADDED_RECV 0x0001 +#define SVC_XPRT_FLAG_ADDED_SEND 0x0002 #define SVC_XPRT_FLAG_INITIAL 0x0004 #define SVC_XPRT_FLAG_INITIALIZED 0x0008 @@ -235,6 +239,8 @@ struct svc_xprt { svc_req_fun_t process_cb; svc_xprt_fun_t rendezvous_cb; } xp_dispatch; + /* Handle resumed requests */ + svc_req_fun_t xp_resume_cb; SVCXPRT *xp_parent; char *xp_tp; /* transport provider device name */ @@ -260,6 +266,7 @@ struct svc_xprt { mutex_t xp_lock; int xp_fd; + int xp_fd_send; /* Sometimes a dup of xp_fd needed for send */ int xp_ifindex; /* interface index */ int xp_si_type; /* si type */ int xp_type; /* xprt type */ @@ -328,6 +335,8 @@ struct svc_req { #define svc_getrpccaller(x) (&(x)->xp_remote.ss) #define svc_getrpclocal(x) (&(x)->xp_local.ss) +extern void svc_resume(struct svc_req *req); + /* * Ganesha. Get connected transport type. */ @@ -449,6 +458,10 @@ static inline void svc_destroy_it(SVCXPRT *xprt, XPRT_TRACE(xprt, __func__, tag, line); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, destroy, tag, line, xprt, flags); +#endif /* USE_LTTNG_NTIRPC */ + if (flags & SVC_XPRT_FLAG_DESTROYING) { /* previously set, do nothing */ return; diff --git a/ntirpc/rpc/xdr.h b/ntirpc/rpc/xdr.h index 240d750731..f773751d20 100644 --- a/ntirpc/rpc/xdr.h +++ b/ntirpc/rpc/xdr.h @@ -117,12 +117,23 @@ enum xdr_op { * BYTES_PER_XDR_UNIT) #endif +/* XDR vector buffer types */ +typedef enum vio_type { + VIO_HEADER, /* header buffer before data */ + VIO_DATA, /* data buffer */ + VIO_TRAILER_LEN, /* length field for following TRAILER buffer */ + VIO_TRAILER, /* trailer buffer after data */ +} vio_type; + /* XDR buffer vector descriptors */ typedef struct xdr_vio { uint8_t *vio_base; uint8_t *vio_head; /* minimum vio_tail (header offset) */ - uint8_t *vio_tail; + uint8_t *vio_tail; /* end of the used part of the buffer */ uint8_t *vio_wrap; /* maximum vio_tail */ + uint32_t vio_length; /* length of buffer, used for vector + pre-allocation */ + vio_type vio_type; /* type of buffer */ } xdr_vio; /* vio_wrap >= vio_tail >= vio_head >= vio_base */ @@ -133,6 +144,7 @@ typedef struct xdr_vio { #define UIO_FLAG_GIFT 0x0004 #define UIO_FLAG_MORE 0x0008 #define UIO_FLAG_REALLOC 0x0010 +#define UIO_FLAG_REFER 0x0020 struct xdr_uio; typedef void (*xdr_uio_release)(struct xdr_uio *, u_int); @@ -188,6 +200,14 @@ typedef struct rpc_xdr { /* new vector and refcounted interfaces */ bool (*x_getbufs)(struct rpc_xdr *, xdr_uio *, u_int); bool (*x_putbufs)(struct rpc_xdr *, xdr_uio *, u_int); + /* Force a new buffer to start (or fail) */ + bool (*x_newbuf)(struct rpc_xdr *); + /* Return the count of buffers in the vector from pos */ + int (*x_iovcount)(struct rpc_xdr *, u_int, u_int); + /* Fill xdr_vio with buffers from pos */ + bool (*x_fillbufs)(struct rpc_xdr *, u_int , xdr_vio *, u_int); + /* Allocate bufs for headers and trailers and insert into vio */ + bool (*x_allochdrs)(struct rpc_xdr *, u_int , xdr_vio *, int); } *x_ops; void *x_public; /* users' data */ void *x_private; /* pointer to private data */ @@ -331,6 +351,23 @@ xdr_putlong(XDR *xdrs, const long *lp) (*(xdrs)->x_ops->x_control)(xdrs, req, op) #define xdr_control(xdrs, req, op) XDR_CONTROL(xdrs, req, op) +#define XDR_NEWBUF(xdrs) \ + (*(xdrs)->x_ops->x_newbuf)(xdrs) +#define xdr_newbuf(xdrs, pos) XDR_NEWBUF(xdrs) + +#define XDR_IOVCOUNT(xdrs, pos, len) \ + (*(xdrs)->x_ops->x_iovcount)(xdrs, pos, len) +#define xdr_iovcount(xdrs, pos, len) XDR_IOVCOUNT(xdrs, pos, len) + +#define XDR_FILLBUFS(xdrs, pos, iov, len) \ + (*(xdrs)->x_ops->x_fillbufs)(xdrs, pos, iov, len) +#define xdr_fillbufs(xdrs, pos, iov, len) XDR_FILLBUFS(xdrs, pos, iov, len) + +#define XDR_ALLOCHDRS(xdrs, pos, iov, iov_count) \ + (*(xdrs)->x_ops->x_allochdrs)(xdrs, pos, iov, iov_count) +#define xdr_allochdrs(xdrs, pos, iov, iov_count) \ + XDR_ALLOCHDRS(xdrs, pos, iov, iov_count) + /* * Support struct for discriminated unions. * You create an array of xdrdiscrim structures, terminated with diff --git a/ntirpc/rpc/xdr_inline.h b/ntirpc/rpc/xdr_inline.h index 33b8e76696..b551024344 100644 --- a/ntirpc/rpc/xdr_inline.h +++ b/ntirpc/rpc/xdr_inline.h @@ -482,12 +482,8 @@ xdr_opaque_encode(XDR *xdrs, const char *cp, u_int cnt) * XDR_INLINE is just as likely to do a function call, * so don't bother with it here. */ - if (!XDR_PUTBYTES(xdrs, cp, cnt)) { - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s:%u ERROR opaque", - __func__, __LINE__); + if (!XDR_PUTBYTES(xdrs, cp, cnt)) return (false); - } /* * round byte count to full xdr units @@ -498,12 +494,8 @@ xdr_opaque_encode(XDR *xdrs, const char *cp, u_int cnt) uint32_t zero = 0; if (!XDR_PUTBYTES(xdrs, (char *) &zero, - BYTES_PER_XDR_UNIT - rndup)) { - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s:%u ERROR zero", - __func__, __LINE__); + BYTES_PER_XDR_UNIT - rndup)) return (false); - } } return (true); @@ -640,12 +632,8 @@ xdr_bytes_encode(XDR *xdrs, char **cpp, u_int *sizep, u_int maxsize) return (false); } - if (!XDR_PUTUINT32(xdrs, size)) { - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s:%u ERROR size", - __func__, __LINE__); + if (!XDR_PUTUINT32(xdrs, size)) return (false); - } return (xdr_opaque_encode(xdrs, sp, size)); } @@ -840,12 +828,8 @@ xdr_array_encode(XDR *xdrs, char **cpp, u_int *sizep, u_int maxsize, return (false); } - if (!XDR_PUTUINT32(xdrs, size)) { - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s:%u ERROR size", - __func__, __LINE__); + if (!XDR_PUTUINT32(xdrs, size)) return (false); - } for (; (i < size) && stat; i++) { stat = (*xdr_elem) (xdrs, target); @@ -1002,12 +986,8 @@ xdr_string_encode(XDR *xdrs, char **cpp, u_int maxsize) return (false); } - if (!XDR_PUTUINT32(xdrs, size)) { - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s:%u ERROR size", - __func__, __LINE__); + if (!XDR_PUTUINT32(xdrs, size)) return (false); - } return (xdr_opaque_encode(xdrs, *cpp, size)); } diff --git a/ntirpc/rpc/xdr_ioq.h b/ntirpc/rpc/xdr_ioq.h index 8effc8d786..9d46e32373 100644 --- a/ntirpc/rpc/xdr_ioq.h +++ b/ntirpc/rpc/xdr_ioq.h @@ -89,6 +89,10 @@ struct xdr_ioq { struct xdr_ioq_uv_head ioq_uv; /* header/vectors */ uint64_t id; + uint32_t write_start; /* Position to start write at */ + int frag_hdr_bytes_sent; /* Indicates a fragment header has been sent */ + bool has_blocked; + struct rpc_dplx_rec *rec; }; #define _IOQ(p) (opr_containerof((p), struct xdr_ioq, ioq_s)) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 34b025d19f..4bb9256236 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -107,7 +107,7 @@ target_link_libraries(ntirpc ${SYSTEM_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) set_target_properties(ntirpc PROPERTIES LINK_FLAGS "-Wl,--version-script=${PROJECT_BINARY_DIR}/libntirpc.map" VERSION ${NTIRPC_VERSION} - SOVERSION "${NTIRPC_MAJOR_VERSION}.${NTIRPC_MINOR_VERSION}" + SOVERSION "${NTIRPC_MAJOR_VERSION}${NTIRPC_MINOR_VERSION}" ) install(TARGETS ntirpc DESTINATION ${LIB_INSTALL_DIR}) diff --git a/src/auth_gss.c b/src/auth_gss.c index 2d90d07eb5..74e372f8b8 100644 --- a/src/auth_gss.c +++ b/src/auth_gss.c @@ -620,7 +620,13 @@ authgss_wrap(AUTH *auth, XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr) { struct rpc_gss_data *gd = AUTH_PRIVATE(auth); - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, "%s()", __func__); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, "%s() %d %s", __func__, + !gd->established ? 0 : gd->sec.svc, + !gd->established ? "not established" + : gd->sec.svc == RPCSEC_GSS_SVC_NONE ? "krb5" + : gd->sec.svc == RPCSEC_GSS_SVC_INTEGRITY ? "krb5i" + : gd->sec.svc == RPCSEC_GSS_SVC_PRIVACY ? "krb5p" + : "unknown"); if (!gd->established || gd->sec.svc == RPCSEC_GSS_SVC_NONE) return ((*xdr_func) (xdrs, xdr_ptr)); @@ -642,5 +648,5 @@ authgss_unwrap(AUTH *auth, XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr) return (xdr_rpc_gss_unwrap (xdrs, xdr_func, xdr_ptr, gd->ctx, gd->sec.qop, gd->sec.svc, - gd->gc.gc_seq)); + gd->gc.gc_seq, NULL, NULL)); } diff --git a/src/authgss_prot.c b/src/authgss_prot.c index eba71b6b1b..b01578f452 100644 --- a/src/authgss_prot.c +++ b/src/authgss_prot.c @@ -45,10 +45,12 @@ #include #include #include +#include /* additional space needed for encoding */ #define RPC_SLACK_SPACE 1024 #define AUTHGSS_MAX_TOKEN_SIZE 24576 /* default MS PAC is 12000 bytes */ +#define MAXALLOCA (256) bool xdr_rpc_gss_encode(XDR *xdrs, gss_buffer_t buf, u_int maxsize) @@ -169,32 +171,440 @@ xdr_rpc_gss_init_res(XDR *xdrs, struct rpc_gss_init_res *p) return (xdr_stat); } +void +gss_log_error(char *m, OM_uint32 maj_stat, OM_uint32 min_stat) +{ + OM_uint32 min; + gss_buffer_desc msg1, msg2; + int msg_ctx = 0; + + gss_display_status(&min, maj_stat, GSS_C_GSS_CODE, GSS_C_NULL_OID, + &msg_ctx, &msg1); + + gss_display_status(&min, min_stat, GSS_C_MECH_CODE, GSS_C_NULL_OID, + &msg_ctx, &msg2); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, "rpcsec_gss: %s: %s - %s\n", + m, (char *)msg1.value, (char *)msg2.value); + gss_release_buffer(&min, &msg1); + gss_release_buffer(&min, &msg2); +} + +void show_gss_xdr_iov(gss_iov_buffer_desc *gss_iov, int gss_count, + xdr_vio *xdr_iov, int xdr_count, const char *desc) +{ + int i; + + /* Now show the gss_iov */ + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "Show the gss_iov %s %p", desc, gss_iov); + + for (i = 0; i < gss_count; i++) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "buf %d type %d length %d value %p", + i, gss_iov[i].type, gss_iov[i].buffer.length, + gss_iov[i].buffer.value); + } + + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "Show the xdr_iov %s %p", desc, xdr_iov); + + if (xdr_iov == NULL) + return; + + for (i = 0; i < xdr_count; i++) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "buf %d type %d (base %p head %p tail %p wrap %p) length %lu", + i, xdr_iov[i].vio_type, + xdr_iov[i].vio_base, xdr_iov[i].vio_head, + xdr_iov[i].vio_tail, xdr_iov[i].vio_wrap, + (unsigned long)(xdr_iov[i].vio_tail - + xdr_iov[i].vio_head)); + } +} + bool xdr_rpc_gss_wrap(XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr, gss_ctx_id_t ctx, gss_qop_t qop, rpc_gss_svc_t svc, u_int seq) { gss_buffer_desc databuf, wrapbuf; OM_uint32 maj_stat, min_stat; - int start, end, conf_state; - bool xdr_stat; + int start, end, conf_state, xv_count, gv_count, data_count, after_data, i; + bool xdr_stat, vector; u_int databuflen, maxwrapsz; + gss_iov_buffer_desc *gss_iov = NULL; + xdr_vio *xdr_iov = NULL, *data; + u_int32_t xvsize = 0, gvsize = 0; - /* Write dummy for databody length. */ + if (svc != RPCSEC_GSS_SVC_PRIVACY && + svc != RPCSEC_GSS_SVC_INTEGRITY) { + /* For some reason we got here with not supported type. */ + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() svc != RPCSEC_GSS_SVC_PRIVACY or RPCSEC_GSS_SVC_INTEGRITY", + __func__); + return (FALSE); + } + + /* Write dummy for databody length. The length will be filled in later. + * - For RPCSEC_GSS_SVC_PRIVACY the length will include the whole + * result of gss_wrap. + * - For RPCSEC_GSS_SVC_INTEGRITY the length will just be the response + * data length. + * No matter what type or how we process, we will come back and fill + * the length in exactly here. + */ start = XDR_GETPOS(xdrs); databuflen = 0xaaaaaaaa; /* should always overwrite */ - if (!XDR_PUTUINT32(xdrs, databuflen)) + if (!XDR_PUTUINT32(xdrs, databuflen)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() could not put databuflen", + __func__); return (FALSE); + } - memset(&databuf, 0, sizeof(databuf)); - memset(&wrapbuf, 0, sizeof(wrapbuf)); - - /* Marshal rpc_gss_data_t (sequence number + arguments). */ - if (!XDR_PUTUINT32(xdrs, seq) || !(*xdr_func) (xdrs, xdr_ptr)) + /* If we are doing PRIVACY, determine if XDR is a vector or not. + * INTEGRITY can work with non-vector xdrs like xdrmem because the + * MIC token will just be appended at the end. + * If it's privacy, and NEWBUF is supported (because xdrs is a vector) + * then NEWBUF will have allocated the new buffer. + */ + vector = (svc == RPCSEC_GSS_SVC_INTEGRITY) || XDR_NEWBUF(xdrs); + + /* Marshal rpc_gss_data_t (sequence number + arguments). + * If it's a vector, the response has been marshalled into a new + * buffer so that we will be able to insert any header. + */ + if (!XDR_PUTUINT32(xdrs, seq) || !(*xdr_func) (xdrs, xdr_ptr)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() could not enocde rpc_gss_data_t", + __func__); return (FALSE); + } + end = XDR_GETPOS(xdrs); + databuflen = end - start - 4; + + if (vector) { + /* Now we have the response encoded, time to build out iov for + * gss_get_mic_iov or gss_wrap_iov. + * + * vsize = ioq count + 2 (for header and trailer) + */ + data_count = XDR_IOVCOUNT(xdrs, start + 4, databuflen); + + if (data_count < 0) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() data_count = %d", + __func__, data_count); + return (FALSE); + } + + if (svc == RPCSEC_GSS_SVC_INTEGRITY) { + /* Add a trailer length (which won't be part of the gss_iov + * and trailer buffer for the MIC + */ + xv_count = data_count + 2; + gv_count = data_count + 1; + after_data = data_count; + } else if (svc == RPCSEC_GSS_SVC_PRIVACY) { + /* Add header, padding, and trailer for the wrap */ + xv_count = data_count + 3; + gv_count = data_count + 3; + after_data = data_count + 1; + } + + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "data_count=%d, gv_count=%d, xv_count=%d, after_data=%d", + data_count, gv_count, xv_count, after_data); + + /* Determine the size of the gss_iov */ + gvsize = gv_count * sizeof(gss_iov_buffer_desc); + xvsize = xv_count * sizeof(xdr_vio); + + /* Allocate the gss_iov */ + if (unlikely(gvsize > MAXALLOCA)) { + gss_iov = mem_alloc(gvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "mem_alloc gss_iov=%p size %llu count %d", + gss_iov, (unsigned long long) gvsize, + gv_count); + } else { + gss_iov = alloca(gvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "alloca gss_iov=%p size %llu count %d", + gss_iov, (unsigned long long) gvsize, + gv_count); + } + + /* Allocate the xdr_iov */ + if (unlikely(xvsize > MAXALLOCA)) { + xdr_iov = mem_alloc(xvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "mem_alloc xdr_iov=%p size %llu count %d", + xdr_iov, (unsigned long long) xvsize, + xv_count); + } else { + xdr_iov = alloca(xvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "alloca xdr_iov=%p size %llu count %d", + xdr_iov, (unsigned long long) xvsize, + xv_count); + } + + memset(gss_iov, 0, gvsize); + memset(xdr_iov, 0, xvsize); + + /* Point to where the first buffer in the data will be. */ + data = &xdr_iov[(svc == RPCSEC_GSS_SVC_PRIVACY) ? 1 : 0]; + + /* Now fill in the data buffers + * vector is empty on entry + * DATA buffers are completely filled (vio_base, vio_head, + * vio_tail, vio_wrap, vio_length, and vio_type) on exit. + * No other buffers are touched at this point. + */ + xdr_stat = XDR_FILLBUFS(xdrs, start + 4, data, databuflen); + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, gv_count, xdr_iov, xv_count, + "after XDR_FILLBUFS"); + + /* Now set up the gss_iov */ + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, "Set up gss_iov"); + for (i = 0; i < gv_count; i++) { + if (i == 0 && svc == RPCSEC_GSS_SVC_PRIVACY) { + /* Fill in HEADER buffer */ + gss_iov[i].type = GSS_IOV_BUFFER_TYPE_HEADER; + } else if (i < after_data) { + /* Copy over a DATA buffer */ + gss_iov[i].type = GSS_IOV_BUFFER_TYPE_DATA; + gss_iov[i].buffer.length = + xdr_iov[i].vio_length; + gss_iov[i].buffer.value = + xdr_iov[i].vio_head; + } else if (svc == RPCSEC_GSS_SVC_INTEGRITY) { + /* Set up TRAILER buffer for INTEGRITY*/ + gss_iov[i].type = GSS_IOV_BUFFER_TYPE_MIC_TOKEN; + } else if (i == after_data) { + /* Set up PADDING buffer for PRIVACY*/ + gss_iov[i].type = GSS_IOV_BUFFER_TYPE_PADDING; + } else { + /* Set up TRAILER buffer for PRIVACY*/ + gss_iov[i].type = GSS_IOV_BUFFER_TYPE_TRAILER; + } + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "buf %d type %d length %d value %p", + i, gss_iov[i].type, gss_iov[i].buffer.length, + gss_iov[i].buffer.value); + } + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, gv_count, xdr_iov, xv_count, + "after setting up gss_iov"); + + /* At this point gss_iov HEADER, PADDING, and TRAILER have + * type set and buffer is empty. + * DATA is completely filled in. + * xdr_iov DATA buffers are completely filled in. + * xdr_iov HEADER and TRAILER buffers are empty. + */ + + if (svc == RPCSEC_GSS_SVC_INTEGRITY) { + /* Now call gss_get_mic_iov_length */ + maj_stat = gss_get_mic_iov_length(&min_stat, ctx, qop, + gss_iov, gv_count); + + if (maj_stat != GSS_S_COMPLETE) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() gss_get_mic_iov_length failed", + __func__); + gss_log_error("gss_get_mic_iov_length", + maj_stat, min_stat); + xdr_stat = FALSE; + goto out; + } + + /* Set up the VIO_TRAILER_LEN buffer in the xdr_iov */ + xdr_iov[after_data].vio_length = BYTES_PER_XDR_UNIT; + xdr_iov[after_data].vio_type = VIO_TRAILER_LEN; + + /* Copy the TRAILER buffer length into the xdr_iov */ + xdr_iov[after_data + 1].vio_length = + gss_iov[after_data].buffer.length; + xdr_iov[after_data + 1].vio_type = VIO_TRAILER; + + /* Marshal databody_integ length. Note tha this will + * leave the cursor position at start + 4 but the + * forthcoming XDR_ALLOCHDRS is going to fix the + * cursor position to the end of everything. + */ + if (!XDR_SETPOS(xdrs, start)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_SETPOS #2 failed", + __func__); + return (FALSE); + } + + if (!XDR_PUTUINT32(xdrs, databuflen)) + return (FALSE); + } else { + u_int databody_priv_len; + + /* Now call gss_wrap_iov_length */ + maj_stat = gss_wrap_iov_length(&min_stat, ctx, true, + qop, GSS_C_QOP_DEFAULT, + gss_iov, gv_count); + + if (maj_stat != GSS_S_COMPLETE) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() gss_wrap_iov_length failed", + __func__); + gss_log_error("gss_wrap_iov_length", + maj_stat, min_stat); + xdr_stat = FALSE; + goto out; + } + + /* Copy the HEADER buffer length into the xdr_iov */ + xdr_iov[0].vio_length = gss_iov[0].buffer.length; + xdr_iov[0].vio_type = VIO_HEADER; + + /* Copy the PADDING buffer length into the xdr_iov */ + xdr_iov[after_data].vio_length = + gss_iov[after_data].buffer.length; + xdr_iov[after_data].vio_type = VIO_TRAILER; + + /* Copy the TRAILER buffer length into the xdr_iov */ + xdr_iov[after_data + 1].vio_length = + gss_iov[after_data + 1].buffer.length; + xdr_iov[after_data + 1].vio_type = VIO_TRAILER; + + /* Compute the databody_priv length as sum of + * the databuflen and the HEADER, PADDING, and + * TRAILER buffers. + */ + databody_priv_len = databuflen + + gss_iov[0].buffer.length + + gss_iov[after_data].buffer.length + + gss_iov[after_data + 1].buffer.length; + + /* Marshal databody_priv length. Note tha this will + * leave the cursor position at start + 4 but the + * forthcoming XDR_ALLOCHDRS is going to fix the + * cursor position to the end of everything. + */ + if (!XDR_SETPOS(xdrs, start)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_SETPOS #2 failed", + __func__); + return (FALSE); + } + + if (!XDR_PUTUINT32(xdrs, databody_priv_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_PUTUINT32 databody_priv_len failed", + __func__); + return (FALSE); + } + } + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, gv_count, xdr_iov, xv_count, + "after gss_...length"); + + /* At this point: + * The xdr_iov DATA buffers are completely filled in. + * The xdr_iov HEADER and TRAILER buffers have type and length + * filled in. + */ + + /* Now actually allocate the HEADER, PADDING, and TRAILER. + * The cursor position will be updated to the end of the + * TRAILER. + */ + xdr_stat = XDR_ALLOCHDRS(xdrs, start + 4, xdr_iov, xv_count); + + if (!xdr_stat) + goto out; + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, gv_count, xdr_iov, xv_count, + "after XDR_ALLOCHDRS"); + + /* At this point the xdr_iov is completely filled in. */ + + if (svc == RPCSEC_GSS_SVC_INTEGRITY) { + /* Copy the TRAILER buffer into the gss_iov (remember + * it's AFTER the VIO_TRAILER_LEN buffer. + */ + gss_iov[after_data].buffer.value = + xdr_iov[after_data + 1].vio_head; + + /* At this point the gss_iov is completely filled in */ + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, gv_count, xdr_iov, xv_count, + "just before gss_get_mic_iov"); + + /* Now call gss_get_mic_iov */ + maj_stat = gss_get_mic_iov(&min_stat, ctx, qop, + gss_iov, gv_count); + + if (maj_stat != GSS_S_COMPLETE) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() gss_get_mic_iov failed", + __func__); + gss_log_error("gss_get_mic_iov", + maj_stat, min_stat); + xdr_stat = FALSE; + goto out; + } + } else { + /* Copy the HEADER buffer into the gss_iov */ + gss_iov[0].buffer.value = xdr_iov[0].vio_head; + + /* Copy the PADDING buffer into the gss_iov */ + gss_iov[after_data].buffer.value = + xdr_iov[after_data].vio_head; + + /* Copy the TRAILER buffer into the gss_iov */ + gss_iov[after_data + 1].buffer.value = + xdr_iov[after_data + 1].vio_head; + + /* At this point the gss_iov is completely filled in */ + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, gv_count, xdr_iov, xv_count, + "just before gss_wrap_iov"); + + /* Now call gss_wrap_iov */ + maj_stat = gss_wrap_iov(&min_stat, ctx, true, + GSS_C_QOP_DEFAULT, NULL, + gss_iov, gv_count); + + if (maj_stat != GSS_S_COMPLETE) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() gss_wrap_iov failed", + __func__); + gss_log_error("gss_wrap_iov", + maj_stat, min_stat); + xdr_stat = FALSE; + goto out; + } + } + + /* At this point, the xdr_iov now has all the GSS data in it + * and wrapping is complete. Now we need to go back and write + * the length back at start. + */ + + goto out; + } /* else fall through to legacy single buffer implementation */ + + /* Initialize the static buffers */ + memset(&databuf, 0, sizeof(databuf)); + memset(&wrapbuf, 0, sizeof(wrapbuf)); /* Set databuf to marshalled rpc_gss_data_t. */ - databuflen = end - start - 4; if (!XDR_SETPOS(xdrs, start+4)) { __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, "%s() XDR_SETPOS #1 failed", @@ -211,140 +621,415 @@ xdr_rpc_gss_wrap(XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr, return (FALSE); } - xdr_stat = FALSE; + /* We only need the legacy inplementation for RPCSEC_GSS_SVC_PRIVACY */ - if (svc == RPCSEC_GSS_SVC_INTEGRITY) { - /* Marshal databody_integ length. */ - if (!XDR_SETPOS(xdrs, start)) { - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() XDR_SETPOS #2 failed", - __func__); - return (FALSE); - } - if (!XDR_PUTUINT32(xdrs, databuflen)) - return (FALSE); + /* Encrypt rpc_gss_data_t. */ + maj_stat = gss_wrap(&min_stat, ctx, TRUE, qop, &databuf, &conf_state, + &wrapbuf); - /* Checksum rpc_gss_data_t. */ - maj_stat = gss_get_mic(&min_stat, ctx, qop, &databuf, &wrapbuf); - if (maj_stat != GSS_S_COMPLETE) { - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() gss_get_mic failed", - __func__); - return (FALSE); - } - /* Marshal checksum. */ - if (!XDR_SETPOS(xdrs, end)) { - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() XDR_SETPOS #3 failed", - __func__); - gss_release_buffer(&min_stat, &wrapbuf); - return (FALSE); - } - maxwrapsz = (u_int) (wrapbuf.length + RPC_SLACK_SPACE); - xdr_stat = xdr_rpc_gss_encode(xdrs, &wrapbuf, maxwrapsz); - gss_release_buffer(&min_stat, &wrapbuf); - } else if (svc == RPCSEC_GSS_SVC_PRIVACY) { - /* Encrypt rpc_gss_data_t. */ - maj_stat = - gss_wrap(&min_stat, ctx, TRUE, qop, &databuf, &conf_state, - &wrapbuf); - if (maj_stat != GSS_S_COMPLETE) { - gss_log_status("gss_wrap", maj_stat, min_stat); - return (FALSE); - } - /* Marshal databody_priv. */ - if (!XDR_SETPOS(xdrs, start)) { - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() XDR_SETPOS #4 failed", - __func__); - gss_release_buffer(&min_stat, &wrapbuf); - return (FALSE); - } - maxwrapsz = (u_int) (wrapbuf.length + RPC_SLACK_SPACE); - xdr_stat = xdr_rpc_gss_encode(xdrs, &wrapbuf, maxwrapsz); + if (maj_stat != GSS_S_COMPLETE) { + gss_log_status("gss_wrap", maj_stat, min_stat); + return (FALSE); + } + + /* Marshal databody_priv. */ + if (!XDR_SETPOS(xdrs, start)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_SETPOS #4 failed", + __func__); gss_release_buffer(&min_stat, &wrapbuf); + return (FALSE); } + + maxwrapsz = (u_int) (wrapbuf.length + RPC_SLACK_SPACE); + xdr_stat = xdr_rpc_gss_encode(xdrs, &wrapbuf, maxwrapsz); + gss_release_buffer(&min_stat, &wrapbuf); + if (!xdr_stat) { __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, "%s() failed", __func__); } + +out: + + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "check free gss_iov=%p size %llu", + gss_iov, (unsigned long long) gvsize); + + if (unlikely(gvsize > MAXALLOCA)) { + mem_free(gss_iov, gvsize); + } + + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "check free gss_iov=%p size %llu", + xdr_iov, (unsigned long long) xvsize); + + if (unlikely(xvsize > MAXALLOCA)) { + mem_free(xdr_iov, xvsize); + } + return (xdr_stat); } bool xdr_rpc_gss_unwrap(XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr, gss_ctx_id_t ctx, gss_qop_t qop, rpc_gss_svc_t svc, - u_int seq) + u_int seq, checksum_func_t checksum_func, void *priv) { - XDR tmpxdrs; - gss_buffer_desc databuf, wrapbuf; + XDR tmpxdrs, *usexdrs = xdrs; OM_uint32 maj_stat, min_stat; - u_int qop_state; - int conf_state; - uint32_t seq_num; + u_int qop_state, data_start, token_start, buffer_len = 0; + int conf_state, iov_count, token_iov_count, i; + uint32_t seq_num, xvsize = 0, gvsize = 0, data_len, token_len; bool xdr_stat; + gss_iov_buffer_desc *gss_iov = NULL; + xdr_vio *xdr_iov = NULL; + char *buffer = NULL; + + if (svc != RPCSEC_GSS_SVC_PRIVACY && + svc != RPCSEC_GSS_SVC_INTEGRITY) { + /* For some reason we got here with not supported type. */ + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() svc != RPCSEC_GSS_SVC_PRIVACY or RPCSEC_GSS_SVC_INTEGRITY", + __func__); + return (FALSE); + } if (xdr_func == (xdrproc_t) xdr_void || xdr_ptr == NULL) return (TRUE); - memset(&databuf, 0, sizeof(databuf)); - memset(&wrapbuf, 0, sizeof(wrapbuf)); - if (svc == RPCSEC_GSS_SVC_INTEGRITY) { - /* Decode databody_integ. */ - if (!xdr_rpc_gss_decode(xdrs, &databuf)) { + /* + * first deal with the data length since xdr bytes are counted + */ + if (!XDR_GETUINT32(xdrs, &data_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s:%u ERROR size", + __func__, __LINE__); + return (FALSE); + } + data_start = XDR_GETPOS(xdrs); + iov_count = XDR_IOVCOUNT(xdrs, data_start, data_len); + if (iov_count < 0) { __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() xdr_rpc_gss_decode databody_integ failed", + "%s() XDR_IOVCOUNT signed data failed", __func__); return (FALSE); } - /* Decode checksum. */ - if (!xdr_rpc_gss_decode(xdrs, &wrapbuf)) { - gss_release_buffer(&min_stat, &databuf); + if (!XDR_SETPOS(xdrs, data_start + data_len)) { __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() xdr_rpc_gss_decode checksum failed", + "%s() XDR_SETPOS failed", __func__); return (FALSE); } - /* Verify checksum and QOP. */ - maj_stat = - gss_verify_mic(&min_stat, ctx, &databuf, &wrapbuf, - &qop_state); - gss_release_buffer(&min_stat, &wrapbuf); + if (!XDR_GETUINT32(xdrs, &token_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s:%u ERROR size", + __func__, __LINE__); + return (FALSE); + } + token_start = XDR_GETPOS(xdrs); + token_iov_count = XDR_IOVCOUNT(xdrs, token_start, token_len); + if (token_iov_count < 0) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_IOVCOUNT MIC token failed", + __func__); + return (FALSE); + } + + /* Determine the size of the gss_iov and xdr_iov. */ + gvsize = (iov_count + 1) * sizeof(gss_iov_buffer_desc); + xvsize = (iov_count + 1) * sizeof(xdr_vio); + + /* Allocate the gss_iov */ + if (unlikely(gvsize > MAXALLOCA)) { + gss_iov = mem_alloc(gvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "mem_alloc gss_iov=%p size %llu count %d", + gss_iov, (unsigned long long) gvsize, + iov_count); + } else { + gss_iov = alloca(gvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "alloca gss_iov=%p size %llu count %d", + gss_iov, (unsigned long long) gvsize, + iov_count); + } + + /* Allocate the xdr_iov */ + if (unlikely(xvsize > MAXALLOCA)) { + xdr_iov = mem_alloc(xvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "mem_alloc xdr_iov=%p size %llu count %d", + xdr_iov, (unsigned long long) xvsize, + iov_count); + } else { + xdr_iov = alloca(xvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "alloca xdr_iov=%p size %llu count %d", + xdr_iov, (unsigned long long) xvsize, + iov_count); + } + + memset(gss_iov, 0, gvsize); + memset(xdr_iov, 0, xvsize); + + if (token_iov_count != 1) { + /* We need to allocate a token buffer */ + buffer = mem_alloc(token_len); + buffer_len = token_len; + gss_iov[iov_count].type = GSS_IOV_BUFFER_TYPE_MIC_TOKEN; + gss_iov[iov_count].buffer.length = token_len; + gss_iov[iov_count].buffer.value = buffer; + + /* Now extract the MIC token into the buffer */ + if (!xdr_opaque_decode(xdrs, buffer, token_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() xdr_opaque_decode MIC failed", + __func__); + xdr_stat = FALSE; + goto out; + } + } else { + if (!XDR_FILLBUFS(xdrs, token_start, + &xdr_iov[iov_count], token_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_FILLBUFS MIC token failed", + __func__); + xdr_stat = FALSE; + goto out; + } + gss_iov[iov_count].type = GSS_IOV_BUFFER_TYPE_MIC_TOKEN; + gss_iov[iov_count].buffer.length = token_len; + gss_iov[iov_count].buffer.value = + xdr_iov[iov_count].vio_head; + /* Consume the MIC token */ + if (!XDR_SETPOS(xdrs, token_start + token_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_SETPOS failed %lu %lu %lu", + __func__, + (unsigned long) token_start, + (unsigned long) token_len, + (unsigned long) token_start + + token_len); + xdr_stat = FALSE; + goto out; + } + } + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, iov_count + 1, xdr_iov, iov_count + 1, + "just before XDR_FILLBUFS for data buffers"); + + /* Now fill in the data buffers + * DATA buffers are completely filled (vio_base, vio_head, + * vio_tail, vio_wrap, vio_length, and vio_type) on exit. + */ + if (!XDR_FILLBUFS(xdrs, data_start, &xdr_iov[0], data_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_FILLBUFS integrity data failed", + __func__); + xdr_stat = FALSE; + goto out; + } + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, iov_count + 1, xdr_iov, iov_count + 1, + "just after XDR_FILLBUFS for data buffers"); + + /* Now set up the gss_iov */ + for (i = 0; i < iov_count; i++) { + /* Copy over a DATA buffer */ + gss_iov[i].type = GSS_IOV_BUFFER_TYPE_DATA; + gss_iov[i].buffer.length = xdr_iov[i].vio_length; + gss_iov[i].buffer.value = xdr_iov[i].vio_head; + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "buf %d type %d length %d value %p", + i, gss_iov[i].type, gss_iov[i].buffer.length, + gss_iov[i].buffer.value); + } + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, iov_count + 1, xdr_iov, iov_count + 1, + "just before gss_verify_mic_iov"); + + maj_stat = gss_verify_mic_iov(&min_stat, ctx, &qop_state, + gss_iov, iov_count + 1); if (maj_stat != GSS_S_COMPLETE || qop_state != qop) { - gss_release_buffer(&min_stat, &databuf); - gss_log_status("gss_verify_mic", maj_stat, min_stat); - return (FALSE); + gss_log_error("gss_verify_mic_iov", + maj_stat, min_stat); + xdr_stat = FALSE; + goto out; } - } else if (svc == RPCSEC_GSS_SVC_PRIVACY) { - /* Decode databody_priv. */ - if (!xdr_rpc_gss_decode(xdrs, &wrapbuf)) { + + /* Now we have verified. The data is still in place so we can + * decode the actual request from the original xdrs, so position + * to data_start so decode can begin. + */ + if (!XDR_SETPOS(xdrs, data_start)) { __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() xdr_rpc_gss_decode databody_priv failed", + "%s() XDR_SETPOS to veriefied data start failed", __func__); + xdr_stat = FALSE; + goto out; + } + } else if (svc == RPCSEC_GSS_SVC_PRIVACY) { + /* + * first deal with the token length since xdr bytes are counted + * token_start and token_len refer to the entire wrapped package + */ + if (!XDR_GETUINT32(xdrs, &token_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s:%u ERROR size", + __func__, __LINE__); return (FALSE); } - /* Decrypt databody. */ - maj_stat = - gss_unwrap(&min_stat, ctx, &wrapbuf, &databuf, &conf_state, - &qop_state); - gss_release_buffer(&min_stat, &wrapbuf); + token_start = XDR_GETPOS(xdrs); + iov_count = XDR_IOVCOUNT(xdrs, token_start, token_len); - /* Verify encryption and QOP. */ - if (maj_stat != GSS_S_COMPLETE || qop_state != qop - || conf_state != TRUE) { - gss_release_buffer(&min_stat, &databuf); - gss_log_status("gss_unwrap", maj_stat, min_stat); + if (iov_count < 0) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_IOVCOUNT privacy data failed", + __func__); return (FALSE); } + + /* Determine the size of the gss_iov and xdr_iov. + * NOTE: we only need a single xdr_iov buffer. + * The gss_iov will always be 2: STREAM, DATA + */ + gvsize = 2 * sizeof(gss_iov_buffer_desc); + xvsize = sizeof(xdr_vio); + + /* Allocate the gss_iov */ + if (unlikely(gvsize > MAXALLOCA)) { + gss_iov = mem_alloc(gvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "mem_alloc gss_iov=%p size %llu count %d", + gss_iov, (unsigned long long) gvsize, + iov_count); + } else { + gss_iov = alloca(gvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "alloca gss_iov=%p size %llu count %d", + gss_iov, (unsigned long long) gvsize, + iov_count); + } + + /* Allocate the xdr_iov */ + if (unlikely(xvsize > MAXALLOCA)) { + xdr_iov = mem_alloc(xvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "mem_alloc xdr_iov=%p size %llu count %d", + xdr_iov, (unsigned long long) xvsize, + iov_count); + } else { + xdr_iov = alloca(xvsize); + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "alloca xdr_iov=%p size %llu count %d", + xdr_iov, (unsigned long long) xvsize, + iov_count); + } + + memset(gss_iov, 0, gvsize); + memset(xdr_iov, 0, xvsize); + + gss_iov[0].type = GSS_IOV_BUFFER_TYPE_STREAM; + gss_iov[1].type = GSS_IOV_BUFFER_TYPE_DATA; + + if (iov_count == 1) { + /* We can unwrap in place */ + if (!XDR_FILLBUFS(xdrs, token_start, + &xdr_iov[0], token_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_FILLBUFS wrap token failed", + __func__); + xdr_stat = FALSE; + goto out; + } + + gss_iov[0].buffer.length = xdr_iov[0].vio_length; + gss_iov[0].buffer.value = xdr_iov[0].vio_head; + } else { + /* We need to extract into a single buffer to unwrap */ + buffer = mem_alloc(token_len); + buffer_len = token_len; + gss_iov[0].buffer.length = token_len; + gss_iov[0].buffer.value = buffer; + + /* Now extract the wrap token into the buffer */ + if (!xdr_opaque_decode(xdrs, buffer, token_len)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() xdr_opaque_decode wrap token failed", + __func__); + xdr_stat = FALSE; + goto out; + } + } + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, 2, xdr_iov, 1, + "just before gss_unwrap_iov"); + + /* Now we have the wrap token in the STREAM buffer */ + maj_stat = gss_unwrap_iov(&min_stat, ctx, &conf_state, + &qop_state, gss_iov, 2); + + if (maj_stat != GSS_S_COMPLETE || qop_state != qop) { + gss_log_error("gss_unwrap_iov", maj_stat, min_stat); + xdr_stat = FALSE; + goto out; + } + + /* Now show the gss_iov and xdr_iov */ + show_gss_xdr_iov(gss_iov, 2, xdr_iov, 1, + "just after gss_unwrap_iov"); + + if (iov_count == 1) { + /* We can decode in place, find the data_start by + * determining the offset within the STREAM + * that gss_unwrap_iov indicated via the DATA buffer + * pointer. + */ + data_start = token_start + + gss_iov[1].buffer.value - + gss_iov[0].buffer.value; + data_len = gss_iov[1].buffer.length; + + if (!XDR_SETPOS(xdrs, data_start)) { + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "%s() XDR_SETPOS to veriefied data start failed", + __func__); + xdr_stat = FALSE; + goto out; + } + } else { + /* We need to create an xdrmem from the DATA buffer */ + xdrmem_create(&tmpxdrs, gss_iov[1].buffer.value, + gss_iov[1].buffer.length, XDR_DECODE); + usexdrs = &tmpxdrs; + } } + + /* At this point, usexdrs has been set either to the original xdrs + * up front, or due to the need to unwrap a multi-buffer token, has + * been set to &tmpxdrs. + */ + + /* If checksum is requested perform it. */ + if (checksum_func != NULL) { + checksum_func(priv, usexdrs->x_data, xdr_size_inline(usexdrs)); + } + /* Decode rpc_gss_data_t (sequence number + arguments). */ - xdrmem_create(&tmpxdrs, databuf.value, databuf.length, XDR_DECODE); - xdr_stat = (XDR_GETUINT32(&tmpxdrs, &seq_num) - && (*xdr_func) (&tmpxdrs, xdr_ptr)); - XDR_DESTROY(&tmpxdrs); - gss_release_buffer(&min_stat, &databuf); + xdr_stat = (XDR_GETUINT32(usexdrs, &seq_num) + && (*xdr_func) (usexdrs, xdr_ptr)); + + if (usexdrs == &tmpxdrs) { + /* If it's the tmpxdrs, then destroy the xdrmem we created. */ + XDR_DESTROY(&tmpxdrs); + } /* Verify sequence number. */ if (xdr_stat == TRUE && seq_num != seq) { @@ -353,6 +1038,29 @@ xdr_rpc_gss_unwrap(XDR *xdrs, xdrproc_t xdr_func, void *xdr_ptr, __func__); return (FALSE); } + +out: + + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "check free gss_iov=%p size %llu", + gss_iov, (unsigned long long) gvsize); + + if (unlikely(gvsize > MAXALLOCA)) { + mem_free(gss_iov, gvsize); + } + + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, + "check free gss_iov=%p size %llu", + xdr_iov, (unsigned long long) xvsize); + + if (unlikely(xvsize > MAXALLOCA)) { + mem_free(xdr_iov, xvsize); + } + + if (buffer != NULL) { + mem_free(buffer, buffer_len); + } + return (xdr_stat); } diff --git a/src/clnt_dg.c b/src/clnt_dg.c index 2610046bcc..6e024b6416 100644 --- a/src/clnt_dg.c +++ b/src/clnt_dg.c @@ -332,7 +332,7 @@ clnt_dg_control(CLIENT *clnt, u_int request, void *info) break; case CLSET_SVC_ADDR: /* set to new address */ addr = (struct netbuf *)info; - if (addr->len < sizeof(cu->cu_raddr)) { + if (addr->len > sizeof(cu->cu_raddr)) { rslt = false; break; diff --git a/src/clnt_generic.c b/src/clnt_generic.c index aa5ca21902..4e43f7a9b1 100644 --- a/src/clnt_generic.c +++ b/src/clnt_generic.c @@ -401,6 +401,11 @@ clnt_tli_ncreate(int fd, const struct netconfig *nconf, goto err; } + if (flags & CLNT_CREATE_FLAG_CLOSE) { + /* We got a new FD; this makes it a local client */ + cl->cl_flags |= CLNT_FLAG_LOCAL; + } + if (nconf) { cl->cl_netid = mem_strdup(nconf->nc_netid); cl->cl_tp = mem_strdup(nconf->nc_device); @@ -639,7 +644,7 @@ clnt_req_wait_reply(struct clnt_req *cc) } (void)clock_gettime(CLOCK_REALTIME_FAST, &ts); - timespecadd(&ts, &cc->cc_timeout); + timespecadd(&ts, &cc->cc_timeout, &ts); code = cond_timedwait(&cc->cc_we.cv, &cc->cc_we.mtx, &ts); __warnx(TIRPC_DEBUG_FLAG_CLNT_REQ, diff --git a/src/clnt_perror.c b/src/clnt_perror.c index 545efe8f90..188e9ced3a 100644 --- a/src/clnt_perror.c +++ b/src/clnt_perror.c @@ -46,6 +46,8 @@ #include #include +#include "strl.h" + static char *auth_errmsg(enum auth_stat); /* @@ -72,7 +74,7 @@ rpc_sperror(const struct rpc_err *e, const char *s) len -= i; } - (void)strncpy(str, clnt_sperrno(e->re_status), len - 1); + (void)strlcpy(str, clnt_sperrno(e->re_status), len); i = strlen(str); str += i; len -= i; diff --git a/src/clnt_vc.c b/src/clnt_vc.c index 3033a74b8e..e680b5532b 100644 --- a/src/clnt_vc.c +++ b/src/clnt_vc.c @@ -460,6 +460,11 @@ clnt_vc_destroy(CLIENT *clnt) if (cx->cx_rec) { SVC_RELEASE(&cx->cx_rec->xprt, SVC_RELEASE_FLAG_NONE); + + if (clnt->cl_flags & CLNT_FLAG_LOCAL) { + /* Local client; destroy the xprt */ + SVC_DESTROY(&cx->cx_rec->xprt); + } } clnt_vc_data_free(CT_DATA(cx)); } diff --git a/src/libntirpc.map.in.cmake b/src/libntirpc.map.in.cmake index 2e465df66d..b6ab0d9f2c 100644 --- a/src/libntirpc.map.in.cmake +++ b/src/libntirpc.map.in.cmake @@ -1,4 +1,4 @@ -NTIRPC_${NTIRPC_VERSION} { +NTIRPC_${NTIRPC_VERSION_BASE} { global: # __* __ntirpc_pkg_params; @@ -135,6 +135,7 @@ NTIRPC_${NTIRPC_VERSION} { svc_ncreate; svc_raw_ncreate; svc_reg; + svc_resume; svc_rqst_new_evchan; svc_rqst_evchan_reg; svc_rqst_evchan_unreg; diff --git a/src/lttng/CMakeLists.txt b/src/lttng/CMakeLists.txt index d621a58473..b529ef4c75 100644 --- a/src/lttng/CMakeLists.txt +++ b/src/lttng/CMakeLists.txt @@ -16,7 +16,7 @@ target_link_libraries(ntirpc_tracepoints set_target_properties(ntirpc_tracepoints PROPERTIES VERSION ${NTIRPC_VERSION} - SOVERSION "${NTIRPC_MAJOR_VERSION}.${NTIRPC_MINOR_VERSION}" + SOVERSION "${NTIRPC_MAJOR_VERSION}${NTIRPC_MINOR_VERSION}" ) install(TARGETS ntirpc_tracepoints COMPONENT tracing DESTINATION ${LIB_INSTALL_DIR} ) diff --git a/src/netnamer.c b/src/netnamer.c index 0b14b33647..6e70883d36 100644 --- a/src/netnamer.c +++ b/src/netnamer.c @@ -48,6 +48,8 @@ #include #include +#include "strl.h" + static char *OPSYS = "unix"; static char *NETID = "netid.byname"; static char *NETIDFILE = "/etc/netid"; @@ -190,7 +192,7 @@ int netname2host(char netname[MAXNETNAMELEN + 1], char *hostname, int hostlen) if (getnetid(netname, valbuf)) { val = valbuf; if ((*val == '0') && (val[1] == ':')) { - (void)strncpy(hostname, val + 2, hostlen); + (void)strlcpy(hostname, val + 2, hostlen); return (1); } } diff --git a/src/rpc_dplx_internal.h b/src/rpc_dplx_internal.h index 5c6a4dc346..ef3dd2985b 100644 --- a/src/rpc_dplx_internal.h +++ b/src/rpc_dplx_internal.h @@ -32,6 +32,7 @@ #include #include #include +#include /* Svc event strategy */ enum svc_event_type { @@ -47,10 +48,13 @@ typedef struct rpc_dplx_lock { } locktrace; } rpc_dplx_lock_t; +struct svc_rqst_rec; + /* new unified state */ struct rpc_dplx_rec { struct svc_xprt xprt; /**< Transport Independent handle */ struct xdr_ioq ioq; + struct poolq_head writeq; /**< poolq for write requests */ struct opr_rbtree call_replies; struct opr_rbtree_node fd_node; struct { @@ -64,11 +68,13 @@ struct rpc_dplx_rec { union { #if defined(TIRPC_EPOLL) struct { - struct epoll_event event; + struct epoll_event event_recv; + struct epoll_event event_send; + struct xdr_ioq *xioq_send; } epoll; #endif } ev_u; - void *ev_p; /* struct svc_rqst_rec (internal) */ + struct svc_rqst_rec *ev_p; /* struct svc_rqst_rec (internal) */ size_t maxrec; long pagesz; @@ -76,6 +82,7 @@ struct rpc_dplx_rec { u_int sendsz; uint32_t call_xid; /**< current call xid */ uint32_t ev_count; /**< atomic count of waiting events */ + struct svc_req *svc_req; /**< svc_req we are processing */ }; #define REC_XPRT(p) (opr_containerof((p), struct rpc_dplx_rec, xprt)) @@ -83,14 +90,6 @@ struct rpc_dplx_rec { #define RPC_DPLX_LOCKED 0x00100000 #define RPC_DPLX_UNLOCK 0x00200000 -#ifndef HAVE_STRLCAT -extern size_t strlcat(char *, const char *, size_t); -#endif - -#ifndef HAVE_STRLCPY -extern size_t strlcpy(char *, const char *src, size_t); -#endif - /* in clnt_generic.c */ enum xprt_stat clnt_req_process_reply(SVCXPRT *, struct svc_req *); int clnt_req_xid_cmpf(const struct opr_rbtree_node *lhs, @@ -116,6 +115,9 @@ rpc_dplx_rec_init(struct rpc_dplx_rec *rec) rpc_dplx_lock_init(&rec->recv.lock); opr_rbtree_init(&rec->call_replies, clnt_req_xid_cmpf); mutex_init(&rec->xprt.xp_lock, NULL); + TAILQ_INIT(&rec->writeq.qh); + mutex_init(&rec->writeq.qmutex, NULL); + rec->writeq.qcount = 0; /* Stop this xprt being cleaned immediately */ (void)clock_gettime(CLOCK_MONOTONIC_FAST, &(rec->recv.ts)); @@ -127,6 +129,7 @@ rpc_dplx_rec_destroy(struct rpc_dplx_rec *rec) { rpc_dplx_lock_destroy(&rec->recv.lock); mutex_destroy(&rec->xprt.xp_lock); + mutex_destroy(&rec->writeq.qmutex); #if defined(HAVE_BLKIN) if (rec->xprt.blkin.svc_name) diff --git a/src/rpc_generic.c b/src/rpc_generic.c index bf69aa7b89..0b237f3308 100644 --- a/src/rpc_generic.c +++ b/src/rpc_generic.c @@ -63,6 +63,7 @@ #include #include "rpc_com.h" +#include "strl.h" void thr_keyfree(void *k) @@ -787,12 +788,16 @@ __rpc_taddr2uaddr_af(int af, const struct netbuf *nbuf) switch (af) { case AF_INET: - if (nbuf->len < sizeof(*sin)) + if (nbuf->len < sizeof(*sin)) { + mem_free(ret, RETURN_SIZE); return NULL; + } sin = nbuf->buf; if (inet_ntop(af, &sin->sin_addr, namebuf, sizeof(namebuf)) - == NULL) + == NULL) { + mem_free(ret, RETURN_SIZE); return NULL; + } port = ntohs(sin->sin_port); if (sprintf (ret, "%s.%u.%u", namebuf, ((u_int32_t) port) >> 8, @@ -803,8 +808,10 @@ __rpc_taddr2uaddr_af(int af, const struct netbuf *nbuf) break; #ifdef INET6 case AF_INET6: - if (nbuf->len < sizeof(*sin6)) + if (nbuf->len < sizeof(*sin6)) { + mem_free(ret, RETURN_SIZE); return NULL; + } sin6 = nbuf->buf; if (inet_ntop(af, &sin6->sin6_addr, namebuf6, sizeof(namebuf6)) == NULL) { @@ -957,7 +964,7 @@ __rpc_uaddr2taddr_af(int af, const char *uaddr) sun = (struct sockaddr_un *)mem_zalloc(sizeof(*sun)); sun->sun_family = AF_LOCAL; - strncpy(sun->sun_path, addrstr, sizeof(sun->sun_path) - 1); + strlcpy(sun->sun_path, addrstr, sizeof(sun->sun_path)); ret->len = SUN_LEN(sun); ret->maxlen = sizeof(struct sockaddr_un); ret->buf = sun; diff --git a/src/rpc_rdma.c b/src/rpc_rdma.c index 9cd8203b22..3e9f458eb3 100644 --- a/src/rpc_rdma.c +++ b/src/rpc_rdma.c @@ -51,6 +51,7 @@ #include //fcntl #include //fcntl #include +#include #define EPOLL_SIZE (10) /*^ expected number of fd, must be > 0 */ @@ -571,6 +572,7 @@ rpc_rdma_stats_thread(void *arg) int n; int rc; + rcu_register_thread(); while (rpc_rdma_state.run_count > 0) { n = epoll_wait(rpc_rdma_state.stats_epollfd, epoll_events, EPOLL_EVENTS, EPOLL_WAIT_MS); @@ -634,7 +636,7 @@ rpc_rdma_stats_thread(void *arg) rc = close(childfd); } } - + rcu_unregister_thread(); pthread_exit(NULL); } @@ -836,6 +838,7 @@ rpc_rdma_cq_thread(void *arg) int n; int rc; + rcu_register_thread(); while (rpc_rdma_state.run_count > 0) { n = epoll_wait(rpc_rdma_state.cq_epollfd, epoll_events, EPOLL_EVENTS, EPOLL_WAIT_MS); @@ -895,7 +898,7 @@ rpc_rdma_cq_thread(void *arg) mutex_unlock(&xprt->cm_lock); } } - + rcu_unregister_thread(); pthread_exit(NULL); } @@ -1091,6 +1094,7 @@ rpc_rdma_cm_thread(void *nullarg) int n; int rc; + rcu_register_thread(); while (rpc_rdma_state.run_count > 0) { n = epoll_wait(rpc_rdma_state.cm_epollfd, epoll_events, EPOLL_EVENTS, EPOLL_WAIT_MS); @@ -1166,7 +1170,7 @@ rpc_rdma_cm_thread(void *nullarg) SVC_DESTROY(&cm_xprt->sm_dr.xprt); } } - + rcu_unregister_thread(); pthread_exit(NULL); } diff --git a/src/rpcb_clnt.c b/src/rpcb_clnt.c index 4bf69e7480..cf77b7a1da 100644 --- a/src/rpcb_clnt.c +++ b/src/rpcb_clnt.c @@ -58,6 +58,7 @@ #include #include "rpc_com.h" +#include "strl.h" /* retry timeout default to the moon and back */ static struct timespec to = { 3, 0 }; @@ -313,7 +314,15 @@ static CLIENT *getclnthandle(const char *host, const struct netconfig *nconf, mem_free(addr_to_delete.buf, addr_to_delete.len); } if (!__rpc_nconf2sockinfo(nconf, &si)) { - assert(client == NULL); + if (client != NULL) { + /* if client!=NULL then there should + * have been a failure + */ + assert(CLNT_FAILURE(client)); + /* destroy the failed client */ + CLNT_DESTROY(client); + } + __warnx(TIRPC_DEBUG_FLAG_WARN, "%s: %s", __func__, clnt_sperrno(RPC_UNKNOWNPROTO)); client = clnt_raw_ncreate(1, 1); @@ -338,7 +347,7 @@ static CLIENT *getclnthandle(const char *host, const struct netconfig *nconf, if (targaddr) { *targaddr = mem_zalloc(sizeof(sun.sun_path)); - strncpy(*targaddr, _PATH_RPCBINDSOCK, + strlcpy(*targaddr, _PATH_RPCBINDSOCK, sizeof(sun.sun_path)); } return (client); @@ -346,7 +355,15 @@ static CLIENT *getclnthandle(const char *host, const struct netconfig *nconf, goto out_err; } else { if (getaddrinfo(host, "sunrpc", &hints, &res) != 0) { - assert(client == NULL); + if (client != NULL) { + /* if client!=NULL then there should + * have been a failure + */ + assert(CLNT_FAILURE(client)); + /* destroy the failed client */ + CLNT_DESTROY(client); + } + __warnx(TIRPC_DEBUG_FLAG_WARN, "%s: %s", __func__, clnt_sperrno(RPC_UNKNOWNHOST)); client = clnt_raw_ncreate(1, 1); @@ -598,7 +615,7 @@ __rpcbind_is_up(void) if (sock < 0) return (false); sun.sun_family = AF_LOCAL; - strncpy(sun.sun_path, _PATH_RPCBINDSOCK, sizeof(sun.sun_path)); + strlcpy(sun.sun_path, _PATH_RPCBINDSOCK, sizeof(sun.sun_path)); if (connect(sock, (struct sockaddr *)&sun, sizeof(sun)) < 0) { close(sock); @@ -678,21 +695,8 @@ __rpcb_findaddr_timed(rpcprog_t program, rpcvers_t version, rpcvers_t pmapvers = 2; uint16_t port = 0; - /* - * Try UDP only - there are some portmappers out - * there that use UDP only. - */ if (strcmp(nconf->nc_proto, NC_TCP) == 0) { - struct netconfig *newnconf; - - newnconf = getnetconfigent("udp"); - if (!newnconf) { - client = clnt_raw_ncreate(program, version); - client->cl_error.re_status = RPC_UNKNOWNPROTO; - goto error; - } - client = getclnthandle(host, newnconf, &parms.r_addr); - freenetconfigent(newnconf); + client = getclnthandle(host, nconf, &parms.r_addr); } else if (strcmp(nconf->nc_proto, NC_UDP) == 0) client = getclnthandle(host, nconf, &parms.r_addr); else @@ -1311,6 +1315,8 @@ static CLIENT *local_rpcb(const char *tag) CLNT_CREATE_FLAG_CONNECT); if (CLNT_SUCCESS(client)) { + /* This is a local client (we created the fd above) */ + client->cl_flags |= CLNT_FLAG_LOCAL; return client; } t = rpc_sperror(&client->cl_error, tag); diff --git a/src/strl.h b/src/strl.h new file mode 100644 index 0000000000..5d2f3b4455 --- /dev/null +++ b/src/strl.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Red Hat, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STRL_H +#define STRL_H + +#include "config.h" + +#ifndef HAVE_STRLCAT +extern size_t strlcat(char *, const char *, size_t); +#endif + +#ifndef HAVE_STRLCPY +extern size_t strlcpy(char *, const char *src, size_t); +#endif + + +#endif /* STRL_H */ diff --git a/src/strlcpy.c b/src/strlcpy.c index 029a78d92b..2038a086d6 100644 --- a/src/strlcpy.c +++ b/src/strlcpy.c @@ -14,10 +14,11 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifndef HAVE_STRLCPY #include "config.h" +#ifndef HAVE_STRLCPY #include +#include "strl.h" /* * Copy src to string dst of size siz. At most siz-1 characters diff --git a/src/svc.c b/src/svc.c index c75c048a1d..faf3dfec40 100644 --- a/src/svc.c +++ b/src/svc.c @@ -139,7 +139,8 @@ svc_init(svc_init_params *params) return true; } __svc_params->disconnect_cb = params->disconnect_cb; - __svc_params->request_cb = params->request_cb; + __svc_params->alloc_cb = params->alloc_cb; + __svc_params->free_cb = params->free_cb; __svc_params->max_connections = (params->max_connections) ? params->max_connections : FD_SETSIZE; @@ -188,7 +189,6 @@ svc_init(svc_init_params *params) if (work_pool_params.thrd_max < work_pool_params.thrd_min) work_pool_params.thrd_max = work_pool_params.thrd_min; - svc_ioq_init(); if (work_pool_init(&svc_work_pool, "svc_", &work_pool_params)) { mutex_unlock(&__svc_params->mtx); return false; diff --git a/src/svc_auth_gss.c b/src/svc_auth_gss.c index 48437c9b29..f8e293ffa4 100644 --- a/src/svc_auth_gss.c +++ b/src/svc_auth_gss.c @@ -280,8 +280,8 @@ svcauth_gss_accept_sec_context(struct svc_req *req, gss_release_buffer(&min_stat, &gd->checksum); maj_stat = - gss_sign(&min_stat, gd->ctx, GSS_C_QOP_DEFAULT, &seqbuf, - &checksum); + gss_get_mic(&min_stat, gd->ctx, GSS_C_QOP_DEFAULT, &seqbuf, + &checksum); if (maj_stat != GSS_S_COMPLETE) { gss_release_buffer(&min_stat, &gr->gr_token); @@ -291,12 +291,14 @@ svcauth_gss_accept_sec_context(struct svc_req *req, /* XXX ref? (assert gd->locked?) */ if (checksum.length > MAX_AUTH_BYTES){ gss_release_buffer(&min_stat, &gr->gr_token); + gss_release_buffer(&min_stat, &checksum); return (false); } req->rq_msg.RPCM_ack.ar_verf.oa_flavor = RPCSEC_GSS; req->rq_msg.RPCM_ack.ar_verf.oa_length = checksum.length; memcpy(req->rq_msg.RPCM_ack.ar_verf.oa_body, checksum.value, checksum.length); + gss_release_buffer(&min_stat, &checksum); } return (true); } @@ -371,25 +373,18 @@ svcauth_gss_nextverf(struct svc_req *req, struct svc_rpc_gss_data *gd, } if (checksum.length > MAX_AUTH_BYTES) { gss_log_status("checksum.length", maj_stat, min_stat); + gss_release_buffer(&min_stat, &checksum); return (false); } req->rq_msg.RPCM_ack.ar_verf.oa_flavor = RPCSEC_GSS; req->rq_msg.RPCM_ack.ar_verf.oa_length = checksum.length; memcpy(req->rq_msg.RPCM_ack.ar_verf.oa_body, checksum.value, checksum.length); + gss_release_buffer(&min_stat, &checksum); return (true); } -#define svcauth_gss_return(code) \ - do { \ - if (gc) \ - xdr_free((xdrproc_t) xdr_rpc_gss_cred, gc); \ - if (gd_locked) \ - mutex_unlock(&gd->lock); \ - return (code); \ - } while (0) - enum auth_stat _svcauth_gss(struct svc_req *req, bool *no_dispatch) { @@ -400,15 +395,15 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) struct rpc_gss_init_res gr; int call_stat, offset; OM_uint32 min_stat; - bool gd_locked = false; - bool gd_hashed = false; + enum auth_stat rc = AUTH_OK; /* Initialize reply. */ req->rq_msg.RPCM_ack.ar_verf = _null_auth; /* Unserialize client credentials. */ - if (req->rq_msg.cb_cred.oa_length <= 0) - svcauth_gss_return(AUTH_BADCRED); + if (req->rq_msg.cb_cred.oa_length <= 0) { + return AUTH_BADCRED; + } gc = (struct rpc_gss_cred *)req->rq_msg.rq_cred_body; memset(gc, 0, sizeof(struct rpc_gss_cred)); @@ -418,25 +413,34 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) if (!xdr_rpc_gss_cred(xdrs, gc)) { XDR_DESTROY(xdrs); - svcauth_gss_return(AUTH_BADCRED); + rc = AUTH_BADCRED; + goto cred_free; } XDR_DESTROY(xdrs); /* Check version. */ - if (gc->gc_v != RPCSEC_GSS_VERSION) - svcauth_gss_return(AUTH_BADCRED); + if (gc->gc_v != RPCSEC_GSS_VERSION) { + rc = AUTH_BADCRED; + goto cred_free; + } - if (gc->gc_seq > RPCSEC_GSS_MAXSEQ) - svcauth_gss_return(RPCSEC_GSS_CTXPROBLEM); + if (gc->gc_seq > RPCSEC_GSS_MAXSEQ) { + rc = RPCSEC_GSS_CTXPROBLEM; + goto cred_free; + } - if (gc->gc_proc > RPCSEC_GSS_MAXPROC) - svcauth_gss_return(AUTH_BADCRED); + if (gc->gc_proc > RPCSEC_GSS_MAXPROC) { + rc = AUTH_BADCRED; + goto cred_free; + } /* Check RPCSEC_GSS service. */ if (gc->gc_svc != RPCSEC_GSS_SVC_NONE && gc->gc_svc != RPCSEC_GSS_SVC_INTEGRITY - && gc->gc_svc != RPCSEC_GSS_SVC_PRIVACY) - svcauth_gss_return(AUTH_BADCRED); + && gc->gc_svc != RPCSEC_GSS_SVC_PRIVACY) { + rc = AUTH_BADCRED; + goto cred_free; + } /* Context lookup. */ if ((gc->gc_proc == RPCSEC_GSS_DATA) @@ -448,9 +452,10 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) * N.B., we are explicitly allowed to discard contexts * for any reason (e.g., to save space). */ gd = authgss_ctx_hash_get(gc); - if (!gd) - svcauth_gss_return(RPCSEC_GSS_CREDPROBLEM); - gd_hashed = true; + if (!gd) { + rc = RPCSEC_GSS_CREDPROBLEM; + goto cred_free; + } if (gc->gc_svc != gd->sec.svc) gd->sec.svc = gc->gc_svc; } @@ -466,7 +471,6 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) /* Serialize context. */ mutex_lock(&gd->lock); - gd_locked = true; /* thread auth */ req->rq_auth = gd->auth; @@ -475,7 +479,8 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) if (gd->established) { if (get_time_fast() >= gd->endtime) { *no_dispatch = true; - svcauth_gss_return(RPCSEC_GSS_CREDPROBLEM); + rc = RPCSEC_GSS_CREDPROBLEM; + goto gd_free; } /* XXX implied serialization? or just fudging? advance if @@ -488,7 +493,8 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) offset = 0; } else if (offset >= gd->win || (gd->seqmask & (1 << offset))) { *no_dispatch = true; - svcauth_gss_return(AUTH_OK); + rc = RPCSEC_GSS_CREDPROBLEM; + goto gd_free; } gd->seqmask |= (1 << offset); /* XXX harmless */ @@ -504,21 +510,28 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) case RPCSEC_GSS_INIT: case RPCSEC_GSS_CONTINUE_INIT: - if (req->rq_msg.cb_proc != NULLPROC) - svcauth_gss_return(AUTH_FAILED); /* XXX ? */ + if (req->rq_msg.cb_proc != NULLPROC) { + rc = AUTH_FAILED; /* XXX ? */ + goto gd_free; + } /* XXX why unconditionally acquire creds? */ - if (!svcauth_gss_acquire_cred()) - svcauth_gss_return(AUTH_FAILED); + if (!svcauth_gss_acquire_cred()) { + rc = AUTH_FAILED; + goto gd_free; + } - if (!svcauth_gss_accept_sec_context(req, gd, &gr)) - svcauth_gss_return(AUTH_REJECTEDCRED); + if (!svcauth_gss_accept_sec_context(req, gd, &gr)) { + rc = AUTH_REJECTEDCRED; + goto gd_free; + } if (!svcauth_gss_nextverf(req, gd, htonl(gr.gr_win))) { /* XXX check */ gss_release_buffer(&min_stat, &gr.gr_token); mem_free(gr.gr_ctx.value, 0); - svcauth_gss_return(AUTH_FAILED); + rc = AUTH_FAILED; + goto gd_free; } *no_dispatch = true; @@ -533,45 +546,45 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) gss_release_buffer(&min_stat, &gd->checksum); mem_free(gr.gr_ctx.value, 0); - if (call_stat >= XPRT_DIED) - svcauth_gss_return(AUTH_FAILED); + if (call_stat >= XPRT_DIED) { + rc = AUTH_FAILED; + goto gd_free; + } if (gr.gr_major == GSS_S_COMPLETE) { gd->established = true; - if (!gd_hashed) { - - /* krb5 pac -- try all that apply */ - gss_buffer_desc attr, display_buffer; - - /* completely generic */ - int auth = 1, comp = 0, more = -1; - - memset(&gd->pac.ms_pac, 0, - sizeof(gss_buffer_desc)); - memset(&display_buffer, 0, - sizeof(gss_buffer_desc)); - - /* MS AD */ - attr.value = "urn:mspac:"; - attr.length = 10; - - gr.gr_major = - gss_get_name_attribute(&gr.gr_minor, - gd->client_name, - &attr, &auth, &comp, - &gd->pac.ms_pac, - &display_buffer, - &more); - - if (gr.gr_major == GSS_S_COMPLETE) { - /* dont need it */ - gss_release_buffer(&gr.gr_minor, - &display_buffer); - gd->flags |= SVC_RPC_GSS_FLAG_MSPAC; - } - - (void)authgss_ctx_hash_set(gd); + + /* krb5 pac -- try all that apply */ + gss_buffer_desc attr, display_buffer; + + /* completely generic */ + int auth = 1, comp = 0, more = -1; + + memset(&gd->pac.ms_pac, 0, + sizeof(gss_buffer_desc)); + memset(&display_buffer, 0, + sizeof(gss_buffer_desc)); + + /* MS AD */ + attr.value = "urn:mspac:"; + attr.length = 10; + + gr.gr_major = + gss_get_name_attribute(&gr.gr_minor, + gd->client_name, + &attr, &auth, &comp, + &gd->pac.ms_pac, + &display_buffer, + &more); + + if (gr.gr_major == GSS_S_COMPLETE) { + /* dont need it */ + gss_release_buffer(&gr.gr_minor, + &display_buffer); + gd->flags |= SVC_RPC_GSS_FLAG_MSPAC; } + + (void)authgss_ctx_hash_set(gd); } break; @@ -582,30 +595,38 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) call_stat = svcauth_gss_validate(req, gd); switch (call_stat) { default: - svcauth_gss_return(RPCSEC_GSS_CREDPROBLEM); + rc = RPCSEC_GSS_CREDPROBLEM; + goto gd_free; case 0: break; } - if (!svcauth_gss_nextverf(req, gd, htonl(gc->gc_seq))) - svcauth_gss_return(AUTH_FAILED); + if (!svcauth_gss_nextverf(req, gd, htonl(gc->gc_seq))) { + rc = AUTH_FAILED; + goto gd_free; + } break; case RPCSEC_GSS_DESTROY: - if (req->rq_msg.cb_proc != NULLPROC) - svcauth_gss_return(AUTH_FAILED); /* XXX ? */ + if (req->rq_msg.cb_proc != NULLPROC) { + rc = AUTH_FAILED; /* XXX ? */ + goto gd_free; + } - if (svcauth_gss_validate(req, gd)) - svcauth_gss_return(RPCSEC_GSS_CREDPROBLEM); + if (svcauth_gss_validate(req, gd)) { + rc = RPCSEC_GSS_CREDPROBLEM; + goto gd_free; + } - if (!svcauth_gss_nextverf(req, gd, htonl(gc->gc_seq))) - svcauth_gss_return(AUTH_FAILED); + if (!svcauth_gss_nextverf(req, gd, htonl(gc->gc_seq))) { + rc = AUTH_FAILED; + goto gd_free; + } *no_dispatch = true; /* avoid lock order reversal gd->lock, xprt->xp_lock */ mutex_unlock(&gd->lock); - gd_locked = false; /* This takes gd->lock, so call it after we unlock */ (void)authgss_ctx_hash_del(gd); @@ -624,15 +645,27 @@ _svcauth_gss(struct svc_req *req, bool *no_dispatch) */ unref_svc_rpc_gss_data(gd); req->rq_auth = &svc_auth_none; + goto cred_free; break; default: - svcauth_gss_return(AUTH_REJECTEDCRED); + rc = AUTH_REJECTEDCRED; break; } +gd_free: + mutex_unlock(&gd->lock); + + if (rc != AUTH_OK) { + /* On success, the ref gets returned to the caller */ + unref_svc_rpc_gss_data(gd); + req->rq_auth = NULL; + } + +cred_free: + xdr_free((xdrproc_t) xdr_rpc_gss_cred, gc); - svcauth_gss_return(AUTH_OK); + return rc; } static bool @@ -684,6 +717,15 @@ svcauth_gss_wrap(struct svc_req *req, XDR *xdrs) req->rq_msg.rq_cred_body; bool result; + + __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, "%s() %d %s", __func__, + !gd->established ? 0 : gc->gc_svc, + !gd->established ? "not established" + : gc->gc_svc == RPCSEC_GSS_SVC_NONE ? "krb5" + : gc->gc_svc == RPCSEC_GSS_SVC_INTEGRITY ? "krb5i" + : gc->gc_svc == RPCSEC_GSS_SVC_PRIVACY ? "krb5p" + : "unknown"); + if (!gd->established || gc->gc_svc == RPCSEC_GSS_SVC_NONE) return (svc_auth_none.svc_ah_ops->svc_ah_wrap(req, xdrs)); @@ -708,98 +750,17 @@ svcauth_gss_unwrap(struct svc_req *req) mutex_lock(&gd->lock); result = xdr_rpc_gss_unwrap(req->rq_xdrs, req->rq_msg.rm_xdr.proc, req->rq_msg.rm_xdr.where, gd->ctx, - gd->sec.qop, gd->sec.svc, gc_seq); + gd->sec.qop, gd->sec.svc, gc_seq, + NULL, NULL); mutex_unlock(&gd->lock); return (result); } -static inline bool -xdr_rpc_gss_checksum(struct svc_req *req, gss_ctx_id_t ctx, gss_qop_t qop, - rpc_gss_svc_t svc, u_int seq) +void svcauth_gss_svc_checksum(void *priv, void *databuf, size_t length) { - XDR *xdrs = req->rq_xdrs; - XDR tmpxdrs; - gss_buffer_desc databuf, wrapbuf; - OM_uint32 maj_stat, min_stat; - u_int qop_state; - int conf_state; - uint32_t seq_num; - bool xdr_stat; - - if (req->rq_msg.rm_xdr.proc == (xdrproc_t) xdr_void - || req->rq_msg.rm_xdr.where == NULL) - return (TRUE); - - memset(&databuf, 0, sizeof(databuf)); - memset(&wrapbuf, 0, sizeof(wrapbuf)); - - if (svc == RPCSEC_GSS_SVC_INTEGRITY) { - /* Decode databody_integ. */ - if (!xdr_rpc_gss_decode(xdrs, &databuf)) { - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() xdr_rpc_gss_decode databody_integ failed", - __func__); - return (FALSE); - } - /* Decode checksum. */ - if (!xdr_rpc_gss_decode(xdrs, &wrapbuf)) { - gss_release_buffer(&min_stat, &databuf); - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() xdr_rpc_gss_decode checksum failed", - __func__); - return (FALSE); - } - /* Verify checksum and QOP. */ - maj_stat = - gss_verify_mic(&min_stat, ctx, &databuf, &wrapbuf, - &qop_state); - gss_release_buffer(&min_stat, &wrapbuf); - - if (maj_stat != GSS_S_COMPLETE || qop_state != qop) { - gss_release_buffer(&min_stat, &databuf); - gss_log_status("gss_verify_mic", maj_stat, min_stat); - return (FALSE); - } - } else if (svc == RPCSEC_GSS_SVC_PRIVACY) { - /* Decode databody_priv. */ - if (!xdr_rpc_gss_decode(xdrs, &wrapbuf)) { - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() xdr_rpc_gss_decode databody_priv failed", - __func__); - return (FALSE); - } - /* Decrypt databody. */ - maj_stat = - gss_unwrap(&min_stat, ctx, &wrapbuf, &databuf, &conf_state, - &qop_state); - - gss_release_buffer(&min_stat, &wrapbuf); + struct svc_req *req = priv; - /* Verify encryption and QOP. */ - if (maj_stat != GSS_S_COMPLETE || qop_state != qop - || conf_state != TRUE) { - gss_release_buffer(&min_stat, &databuf); - gss_log_status("gss_unwrap", maj_stat, min_stat); - return (FALSE); - } - } - /* Decode rpc_gss_data_t (sequence number + arguments). */ - xdrmem_create(&tmpxdrs, databuf.value, databuf.length, XDR_DECODE); - SVC_CHECKSUM(req, databuf.value, databuf.length); - xdr_stat = (XDR_GETUINT32(&tmpxdrs, &seq_num) - && (*req->rq_msg.rm_xdr.proc) - (&tmpxdrs, req->rq_msg.rm_xdr.where)); - XDR_DESTROY(&tmpxdrs); - gss_release_buffer(&min_stat, &databuf); - - /* Verify sequence number. */ - if (xdr_stat == TRUE && seq_num != seq) { - __warnx(TIRPC_DEBUG_FLAG_RPCSEC_GSS, - "%s() wrong sequence number in databody", - __func__); - return (FALSE); - } - return (xdr_stat); + SVC_CHECKSUM(req, databuf, length); } static bool @@ -814,8 +775,10 @@ svcauth_gss_checksum(struct svc_req *req) } mutex_lock(&gd->lock); - result = xdr_rpc_gss_checksum(req, gd->ctx, gd->sec.qop, gd->sec.svc, - gc_seq); + result = xdr_rpc_gss_unwrap(req->rq_xdrs, req->rq_msg.rm_xdr.proc, + req->rq_msg.rm_xdr.where, gd->ctx, + gd->sec.qop, gd->sec.svc, gc_seq, + svcauth_gss_svc_checksum, req); mutex_unlock(&gd->lock); return (result); } diff --git a/src/svc_dg.c b/src/svc_dg.c index 43de3d6d8d..156d5ff5f7 100644 --- a/src/svc_dg.c +++ b/src/svc_dg.c @@ -94,6 +94,8 @@ svc_dg_xprt_zalloc(size_t iosz) /* Init SVCXPRT locks, etc */ rpc_dplx_rec_init(&su->su_dr); + /* Extra ref to match TCP */ + SVC_REF(&su->su_dr.xprt, SVC_REF_FLAG_NONE); xdr_ioq_setup(&su->su_dr.ioq); return (su); } @@ -269,7 +271,7 @@ svc_dg_rendezvous(SVCXPRT *xprt) return (XPRT_DIED); } - if (unlikely(svc_rqst_rearm_events(xprt))) { + if (unlikely(svc_rqst_rearm_events(xprt, SVC_XPRT_FLAG_ADDED_RECV))) { __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p fd %d svc_rqst_rearm_events failed (will set dead)", __func__, xprt, xprt->xp_fd); @@ -310,7 +312,7 @@ svc_dg_recv(SVCXPRT *xprt) /* pass the xdrs to user to store in struct svc_req, as most of * the work has already been done on rendezvous */ - stat = __svc_params->request_cb(xprt, REC_XPRT(xprt)->ioq.xdrs); + stat = svc_request(xprt, REC_XPRT(xprt)->ioq.xdrs); if (xprt->xp_flags & SVC_XPRT_FLAG_DESTROYED) return (XPRT_DESTROYED); @@ -318,6 +320,7 @@ svc_dg_recv(SVCXPRT *xprt) /* Only after checking SVC_XPRT_FLAG_DESTROYED: * because SVC_DESTROY() has decremented already. */ + SVC_DESTROY(xprt); SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE); return (stat); } diff --git a/src/svc_internal.h b/src/svc_internal.h index 56cb60af32..c51e437fc8 100644 --- a/src/svc_internal.h +++ b/src/svc_internal.h @@ -56,7 +56,8 @@ struct svc_params { } ev_u; svc_xprt_fun_t disconnect_cb; - svc_xprt_xdr_fun_t request_cb; + svc_xprt_alloc_fun_t alloc_cb; + svc_xprt_free_fun_t free_cb; struct { int ctx_hash_partitions; @@ -76,6 +77,8 @@ struct svc_params { int32_t idle_timeout; }; +enum xprt_stat svc_request(SVCXPRT *xprt, XDR *xdrs); + extern struct svc_params __svc_params[1]; /* @@ -149,8 +152,25 @@ svc_override_ops(struct xp_ops *ops, SVCXPRT *rendezvous) } /* in svc_rqst.c */ -int svc_rqst_rearm_events(SVCXPRT *); +int svc_rqst_rearm_events_locked(SVCXPRT *, uint16_t); + +static inline int svc_rqst_rearm_events(SVCXPRT *xprt, uint16_t ev_flags) +{ + struct rpc_dplx_rec *rec = REC_XPRT(xprt); + int code; + + rpc_dplx_rli(rec); + + code = svc_rqst_rearm_events_locked(xprt, ev_flags); + + rpc_dplx_rui(rec); + + return code; +} + int svc_rqst_xprt_register(SVCXPRT *, SVCXPRT *); void svc_rqst_xprt_unregister(SVCXPRT *, uint32_t); +int svc_rqst_evchan_write(SVCXPRT *, struct xdr_ioq *, bool); +void svc_rqst_xprt_send_complete(SVCXPRT *); #endif /* TIRPC_SVC_INTERNAL_H */ diff --git a/src/svc_ioq.c b/src/svc_ioq.c index e99b55ee7d..46b944b257 100644 --- a/src/svc_ioq.c +++ b/src/svc_ioq.c @@ -66,217 +66,377 @@ #include #include "svc_ioq.h" -/* Send queues, configurable using RPC_Ioq_ThrdMax - * - * Ideally, these would be some variant of weighted fair queuing. Currently, - * assuming supplied by underlying OS. - * - * The assigned thread should have affinity for the interface. Therefore, the - * first thread arriving for each interface is used for all subsequent work, - * until the interface is idle. This assumes that the output interface is - * closely associated with the input interface. - * - * Note that this is a fixed size list of interfaces. In most cases, - * many of these entries will be unused. - * - * For efficiency, a mask is applied to the ifindex, possibly causing overlap of - * multiple interfaces. The size is selected to be larger than expected number - * of concurrently active interfaces. Size must be a power of 2 for mask. - */ -static int num_send_queues; /* must be a power of 2 */ -static struct poolq_head *ioq_ifqh; - -static inline int -svc_ioq_mask(int fd) -{ - return fd & (num_send_queues - 1); /* num_send_queues is a power of 2 */ -} - -void -svc_ioq_init(void) -{ - struct poolq_head *ifph; - int i; - - /* We would like to make the number of send queues close to half - * of the thrd_max. Also, the number of send queues must be a - * power 2 for quick bitmask hashig! - */ - num_send_queues = 1; - while (num_send_queues * 2 < __svc_params->ioq.thrd_max / 2) - num_send_queues <<= 1; - - ioq_ifqh = mem_calloc(num_send_queues, sizeof(struct poolq_head)); - for (i = 0, ifph = &ioq_ifqh[0]; i < num_send_queues; ifph++, i++) { - ifph->qcount = 0; - TAILQ_INIT(&ifph->qh); - mutex_init(&ifph->qmutex, NULL); - } -} - #define LAST_FRAG ((u_int32_t)(1 << 31)) +#define LAST_FRAG_XDR_UNITS ((LAST_FRAG - 1) & ~(BYTES_PER_XDR_UNIT - 1)) #define MAXALLOCA (256) -static inline void +/* Returns 0 on success, EWOULDBLOCK if would block, <0 on error */ +static inline int svc_ioq_flushv(SVCXPRT *xprt, struct xdr_ioq *xioq) { - struct iovec *iov, *tiov, *wiov; - struct poolq_entry *have; - struct xdr_ioq_uv *data; + struct msghdr msg; + struct iovec *iov; + struct xdr_vio *vio; ssize_t result; u_int32_t frag_header; u_int32_t fbytes; - u_int32_t remaining = 0; - u_int32_t vsize = (xioq->ioq_uv.uvqh.qcount + 1) * sizeof(struct iovec); - int iw = 0; - int ix = 1; + int error = 0; + int frag_needed = 0; + u_int32_t last_frag = 0; + u_int32_t end, remaining, iov_count, vsize, isize; + + /* update the most recent data length, just in case */ + xdr_tail_update(xioq->xdrs); + + /* Some basic computations */ + end = XDR_GETPOS(xioq->xdrs); + remaining = end - xioq->write_start; + iov_count = XDR_IOVCOUNT(xioq->xdrs, xioq->write_start, remaining); + vsize = (iov_count + 1) * sizeof(struct iovec); + isize = iov_count * sizeof(struct xdr_vio); + + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "-------> %s: remaining %"PRIu32" write_start %"PRIu32 + " end %"PRIu32, + __func__, remaining, xioq->write_start, end); + + memset(&msg, 0, sizeof(msg)); + + if (end > (2 * LAST_FRAG_XDR_UNITS)) { + /* This data will need to be 3 fragments */ + if (xioq->write_start < LAST_FRAG_XDR_UNITS) { + fbytes = LAST_FRAG_XDR_UNITS - xioq->write_start; + } else if (xioq->write_start < (2 * LAST_FRAG_XDR_UNITS)) { + fbytes = (2 * LAST_FRAG_XDR_UNITS) - xioq->write_start; + } else { + fbytes = end - xioq->write_start; + last_frag = LAST_FRAG; + } + } else if (end > LAST_FRAG_XDR_UNITS) { + /* This data will need to be 2 fragments */ + if (xioq->write_start < LAST_FRAG_XDR_UNITS) { + fbytes = LAST_FRAG_XDR_UNITS - xioq->write_start; + } else { + fbytes = end - xioq->write_start; + last_frag = LAST_FRAG; + } + } else { + fbytes = remaining; + last_frag = LAST_FRAG; + } if (unlikely(vsize > MAXALLOCA)) { iov = mem_alloc(vsize); } else { iov = alloca(vsize); } - wiov = iov; /* position at initial fragment header */ - - /* update the most recent data length, just in case */ - xdr_tail_update(xioq->xdrs); - /* build list after initial fragment header (ix = 1 above) */ - TAILQ_FOREACH(have, &(xioq->ioq_uv.uvqh.qh), q) { - data = IOQ_(have); - tiov = iov + ix; - tiov->iov_base = data->v.vio_head; - tiov->iov_len = ioquv_length(data); - remaining += tiov->iov_len; - ix++; + if (unlikely(isize > MAXALLOCA)) { + vio = mem_alloc(isize); + } else { + vio = alloca(isize); } while (remaining > 0) { - if (iw == 0) { - /* new fragment header, determine last iov */ - fbytes = 0; - for (tiov = &wiov[++iw]; - (tiov < &iov[ix]) && (iw < __svc_maxiov); - ++tiov, ++iw) { - fbytes += tiov->iov_len; - - /* check for fragment value overflow */ - /* never happens, see ganesha FSAL_MAXIOSIZE */ - if (unlikely(fbytes >= LAST_FRAG)) { - fbytes -= tiov->iov_len; - break; - } - } /* for */ - - /* fragment length doesn't include fragment header */ - if (&wiov[iw] < &iov[ix]) { - frag_header = htonl((u_int32_t) (fbytes)); - } else { - frag_header = htonl((u_int32_t) (fbytes | LAST_FRAG)); - } - wiov->iov_base = &(frag_header); - wiov->iov_len = sizeof(u_int32_t); + int i; + int frag_hdr_size = 0; + + /* Note that there may be lots of re-walking the ioq to + * count the number of buffers or fill the buffers in the vio, + * unfortunately, any mechanism to try and avoid that would + * still have to re-walk the ioq, so we don't save THAT much + * by just recomputing in preparation for each attempt to send + * data. We could shortcut a little bit if we could estimate + * how many bytes would fit in a single iovec so that we + * don't walk more of the ioq than we need to. But that adds a + * lot of complexity, and just saves walking a linked list. + * + * A more relevant improvement here might actually be to use + * larger buffers than 8k. Optionally, when we do more to + * implement zero copy, the largest responses which are + * READ and READDIR will be adding a single buffer, or a small + * number of buffers to the ioq instead of copying into the + * 8k byte buffers. + */ + iov_count = XDR_IOVCOUNT(xioq->xdrs, xioq->write_start, fbytes); + + if (xioq->write_start == 0 || + xioq->write_start == LAST_FRAG_XDR_UNITS || + xioq->write_start == (2 * LAST_FRAG_XDR_UNITS)) { + /* We need a fragment header, or to complete it. Look + * at xioq->frag_hdr_bytes_sent to know how many bytes + * of it we have sent so far. + */ + frag_needed = 1; + frag_header = htonl((u_int32_t) (fbytes | last_frag)); + iov[0].iov_base = ((char *) &frag_header) + + xioq->frag_hdr_bytes_sent; + iov[0].iov_len = sizeof(frag_header) - + xioq->frag_hdr_bytes_sent; + frag_hdr_size = iov[0].iov_len; + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d iov[0].vio_head %p vio_length %z", + __func__, xprt, xprt->xp_fd, + iov[0].iov_base, iov[0].iov_len); + } - /* writev return includes fragment header */ - remaining += sizeof(u_int32_t); - fbytes += sizeof(u_int32_t); + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d msg_iov %p remaining %"PRIu32 + " fbytes %"PRIu32" iov_count %"PRIu32 + " write_start %"PRIu32" end %"PRIu32 + " frag_needed %d frag_hdr_size %d", + __func__, xprt, xprt->xp_fd, msg.msg_iov, + remaining, fbytes, iov_count, + xioq->write_start, end, frag_needed, frag_hdr_size); + + /* Get an xdr_vio corresponding to the bytes of this fragment */ + if (!XDR_FILLBUFS(xioq->xdrs, xioq->write_start, vio, fbytes)) { + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s() XDR_FILLBUFS failed", __func__); + error = -1; + break; } - /* blocking write */ - result = writev(xprt->xp_fd, wiov, iw); - remaining -= result; + if (iov_count + frag_needed > UIO_MAXIOV) { + /* sendmsg can only take UIO_MAXIOV iovecs */ + iov_count = UIO_MAXIOV - frag_needed; + } - if (result == fbytes) { - wiov += iw - 1; - iw = 0; - continue; + /* Convert the xdr_vio to an iovec */ + for (i = 0; i < iov_count; i++) { + iov[i + frag_needed].iov_base = vio[i].vio_head; + iov[i + frag_needed].iov_len = vio[i].vio_length; + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d iov[%d].vio_head %p vio_length %z", + __func__, xprt, xprt->xp_fd, i + frag_needed, + iov[i + frag_needed].iov_base, + iov[i + frag_needed].iov_len); } + + msg.msg_iov = iov; + msg.msg_iovlen = iov_count + frag_needed; + +again: + +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, sendmsg, __func__, __LINE__, + xprt, + (unsigned int) remaining, + (unsigned int) frag_needed, + (unsigned int) iov_count); +#endif /* USE_LTTNG_NTIRPC */ + + /* non-blocking write */ + errno = 0; + result = sendmsg(xprt->xp_fd, &msg, MSG_DONTWAIT); + error = errno; + + __warnx((error == EWOULDBLOCK || error == EAGAIN || error == 0) + ? TIRPC_DEBUG_FLAG_SVC_VC + : TIRPC_DEBUG_FLAG_ERROR, + "%s: %p fd %d msg_iov %p sendmsg remaining %" + PRIu32" result %ld error %s (%d)", + __func__, xprt, xprt->xp_fd, msg.msg_iov, + remaining, (long int) result, + strerror(error), error); + if (unlikely(result < 0)) { - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s() writev failed (%d)\n", - __func__, errno); - SVC_DESTROY(xprt); + if (error == EWOULDBLOCK || error == EAGAIN) { + /* Socket buffer full; don't destroy */ + error = EWOULDBLOCK; + xioq->has_blocked = true; + } else { + error = result; + } break; } + + if (result < frag_hdr_size) { + /* We had a fragment headerr and didn't manage to send + * the entire thing... + */ + xioq->frag_hdr_bytes_sent += result; + iov[0].iov_base += result; + iov[0].iov_len -= result; + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d iov[0].vio_head %p vio_length %z", + __func__, xprt, xprt->xp_fd, + iov[0].iov_base, iov[0].iov_len); + /* Shortcut because we don't need to recompute the + * iovec. + */ + goto again; + } + + /* At this point, the frag header must have been fully sent, + * go ahead and indicate that... Also deduct any fragment + * header bytes from result. + */ + xioq->frag_hdr_bytes_sent = sizeof(frag_header); + result -= frag_hdr_size; + frag_hdr_size = 0; + + /* Keep track of progress */ + remaining -= result; fbytes -= result; - /* rare? writev underrun? (assume never overrun) */ - for (tiov = wiov; iw > 0; ++tiov, --iw) { - if (tiov->iov_len > result) { - tiov->iov_len -= result; - tiov->iov_base += result; - wiov = tiov; - break; + /* Keep track of progress in the xioq */ + xioq->write_start += result; + + if (fbytes == 0) { + /* We completed sending a fragment. */ + xioq->frag_hdr_bytes_sent = 0; + if (remaining > LAST_FRAG_XDR_UNITS) { + fbytes = LAST_FRAG_XDR_UNITS; } else { - result -= tiov->iov_len; + fbytes = remaining; } - } /* for */ + frag_needed = 1; + } else { + frag_needed = 0; + } } /* while */ - if (unlikely(vsize > MAXALLOCA)) { + if (unlikely(vsize > MAXALLOCA)) mem_free(iov, vsize); - } + + if (unlikely(isize > MAXALLOCA)) + mem_free(vio, isize); + + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d returning %s (%d)", + __func__, xprt, xprt->xp_fd, strerror(error), error); + + return error; } -static void -svc_ioq_write(SVCXPRT *xprt, struct xdr_ioq *xioq, struct poolq_head *ifph) +void svc_ioq_write(SVCXPRT *xprt) { + struct rpc_dplx_rec *rec = REC_XPRT(xprt); + struct xdr_ioq *xioq; struct poolq_entry *have; - for (;;) { +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, mutex, __func__, __LINE__, xprt); +#endif /* USE_LTTNG_NTIRPC */ + mutex_lock(&rec->writeq.qmutex); + /* Process the xioq from the head of the xprt queue */ + have = TAILQ_FIRST(&rec->writeq.qh); + mutex_unlock(&rec->writeq.qmutex); + + while (have != NULL) { + int rc = 0; + + xioq = _IOQ(have); + + /* Save has blocked before state */ + bool has_blocked = xioq->has_blocked; + /* do i/o unlocked */ if (svc_work_pool.params.thrd_max && !(xprt->xp_flags & SVC_XPRT_FLAG_DESTROYED)) { /* all systems are go! */ - svc_ioq_flushv(xprt, xioq); + rc = svc_ioq_flushv(xprt, xioq); } - SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE); - XDR_DESTROY(xioq->xdrs); - mutex_lock(&ifph->qmutex); - if (--(ifph->qcount) == 0) +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, mutex, __func__, __LINE__, &rec->xprt); +#endif /* USE_LTTNG_NTIRPC */ + mutex_lock(&rec->writeq.qmutex); + if (rc < 0) { + /* Dequeue the failed request */ + TAILQ_REMOVE(&rec->writeq.qh, have, q); + mutex_unlock(&rec->writeq.qmutex); + + /* IO failed, destroy rather than releasing */ + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d About to destroy - rc = %d", + __func__, xprt, xprt->xp_fd, rc); + SVC_DESTROY(xprt); + break; + } else if (rc == EWOULDBLOCK){ + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d EWOULDBLOCK", + __func__, xprt, xprt->xp_fd); + /* Add to epoll and stop processing this xprt's queue */ +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, write_blocked, __func__, __LINE__, + &rec->xprt); +#endif /* USE_LTTNG_NTIRPC */ + svc_rqst_evchan_write(xprt, xioq, has_blocked); + mutex_unlock(&rec->writeq.qmutex); break; + } else { + if (xioq->has_blocked) { + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d COMPLETED AFTER BLOCKING", + __func__, xprt, xprt->xp_fd); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, write_complete, __func__, __LINE__, + &rec->xprt, (int) xioq->has_blocked); +#endif /* USE_LTTNG_NTIRPC */ + svc_rqst_xprt_send_complete(xprt); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d COMPLETED", + __func__, xprt, xprt->xp_fd); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, write_complete, __func__, __LINE__, + &rec->xprt, (int) xioq->has_blocked); +#endif /* USE_LTTNG_NTIRPC */ + } - have = TAILQ_FIRST(&ifph->qh); - TAILQ_REMOVE(&ifph->qh, have, q); - mutex_unlock(&ifph->qmutex); + /* Dequeue the completed request */ + TAILQ_REMOVE(&rec->writeq.qh, have, q); - xioq = _IOQ(have); - xprt = (SVCXPRT *)xioq->xdrs[0].x_lib[1]; + /* Fetch the next request */ + have = TAILQ_FIRST(&rec->writeq.qh); + mutex_unlock(&rec->writeq.qmutex); + + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: %p fd %d About to release", + __func__, xprt, xprt->xp_fd); + SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE); + XDR_DESTROY(xioq->xdrs); + } } - mutex_unlock(&ifph->qmutex); } static void svc_ioq_write_callback(struct work_pool_entry *wpe) { struct xdr_ioq *xioq = opr_containerof(wpe, struct xdr_ioq, ioq_wpe); - SVCXPRT *xprt = (SVCXPRT *)xioq->xdrs[0].x_lib[1]; - struct poolq_head *ifph = &ioq_ifqh[svc_ioq_mask(xprt->xp_fd)]; - svc_ioq_write(xprt, xioq, ifph); + svc_ioq_write(xioq->xdrs[0].x_lib[1]); } void svc_ioq_write_now(SVCXPRT *xprt, struct xdr_ioq *xioq) { - struct poolq_head *ifph = &ioq_ifqh[svc_ioq_mask(xprt->xp_fd)]; + struct rpc_dplx_rec *rec = REC_XPRT(xprt); + bool was_empty; SVC_REF(xprt, SVC_REF_FLAG_NONE); - mutex_lock(&ifph->qmutex); - if ((ifph->qcount)++ > 0) { - /* queue additional output requests without task switch */ - TAILQ_INSERT_TAIL(&ifph->qh, &(xioq->ioq_s), q); - mutex_unlock(&ifph->qmutex); - return; - } - mutex_unlock(&ifph->qmutex); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, mutex, __func__, __LINE__, &rec->xprt); +#endif /* USE_LTTNG_NTIRPC */ + mutex_lock(&rec->writeq.qmutex); + + was_empty = TAILQ_FIRST(&rec->writeq.qh) == NULL; - /* handle this output request without queuing, then any additional - * output requests without a task switch (using this thread). - */ - svc_ioq_write(xprt, xioq, ifph); + /* always queue output requests on the duplex record's writeq */ + TAILQ_INSERT_TAIL(&rec->writeq.qh, &(xioq->ioq_s), q); + + mutex_unlock(&rec->writeq.qmutex); + + if (was_empty) { + /* handle this output request without queuing, then any + * additional output requests without a task switch (using this + * thread). + */ + svc_ioq_write(xprt); + } } /* @@ -289,21 +449,26 @@ svc_ioq_write_now(SVCXPRT *xprt, struct xdr_ioq *xioq) void svc_ioq_write_submit(SVCXPRT *xprt, struct xdr_ioq *xioq) { - struct poolq_head *ifph = &ioq_ifqh[svc_ioq_mask(xprt->xp_fd)]; + struct rpc_dplx_rec *rec = REC_XPRT(xprt); + bool was_empty; SVC_REF(xprt, SVC_REF_FLAG_NONE); - mutex_lock(&ifph->qmutex); - if ((ifph->qcount)++ > 0) { - /* queue additional output requests, they will be handled by - * existing thread without another task switch. - */ - TAILQ_INSERT_TAIL(&ifph->qh, &(xioq->ioq_s), q); - mutex_unlock(&ifph->qmutex); - return; - } - mutex_unlock(&ifph->qmutex); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, mutex, __func__, __LINE__, &xprt); +#endif /* USE_LTTNG_NTIRPC */ + mutex_lock(&rec->writeq.qmutex); + + was_empty = TAILQ_FIRST(&rec->writeq.qh) == NULL; + + /* always queue output requests on the duplex record's writeq */ + TAILQ_INSERT_TAIL(&rec->writeq.qh, &(xioq->ioq_s), q); - xioq->ioq_wpe.fun = svc_ioq_write_callback; - work_pool_submit(&svc_work_pool, &xioq->ioq_wpe); + mutex_unlock(&rec->writeq.qmutex); + + if (was_empty) { + /* Schedule work to process output for this duplex record. */ + xioq->ioq_wpe.fun = svc_ioq_write_callback; + work_pool_submit(&svc_work_pool, &xioq->ioq_wpe); + } } diff --git a/src/svc_ioq.h b/src/svc_ioq.h index d36d4c5e25..2587d6c3bb 100644 --- a/src/svc_ioq.h +++ b/src/svc_ioq.h @@ -29,7 +29,7 @@ #include #include -void svc_ioq_init(void); +void svc_ioq_write(SVCXPRT *); void svc_ioq_write_now(SVCXPRT *, struct xdr_ioq *); void svc_ioq_write_submit(SVCXPRT *, struct xdr_ioq *); diff --git a/src/svc_raw.c b/src/svc_raw.c index 420013e4f0..8519ba5e99 100644 --- a/src/svc_raw.c +++ b/src/svc_raw.c @@ -141,7 +141,7 @@ svc_raw_recv(SVCXPRT *xprt) } mutex_unlock(&svcraw_lock); - return (__svc_params->request_cb(xprt, srp->raw_dr.ioq.xdrs)); + return svc_request(xprt, srp->raw_dr.ioq.xdrs); } static enum xprt_stat diff --git a/src/svc_rqst.c b/src/svc_rqst.c index ae12d7c459..a90fcf8581 100644 --- a/src/svc_rqst.c +++ b/src/svc_rqst.c @@ -49,6 +49,8 @@ #include "clnt_internal.h" #include "svc_internal.h" #include "svc_xprt.h" +#include +#include "svc_ioq.h" /** * @file svc_rqst.c @@ -93,6 +95,7 @@ struct svc_rqst_rec { struct epoll_event ctrl_ev; struct epoll_event *events; u_int max_events; /* max epoll events */ + bool sv1_added; } epoll; #endif struct { @@ -102,8 +105,69 @@ struct svc_rqst_rec { int32_t ev_refcnt; uint16_t ev_flags; + struct xdr_ioq *xioq; /* IOQ for floating sr_rec */ }; +void svc_rqst_rec_init(struct svc_rqst_rec *sr_rec) +{ + /* Pre-initialize stuff that needs to be non-zero */ + mutex_init(&sr_rec->ev_lock, NULL); + sr_rec->sv[0] = -1; + sr_rec->sv[1] = -1; + sr_rec->id_k = UINT32_MAX; +#if defined(TIRPC_EPOLL) + sr_rec->ev_u.epoll.epoll_fd = -1; +#endif +} + +void svc_rqst_rec_destroy(struct svc_rqst_rec *sr_rec) +{ +#if defined(TIRPC_EPOLL) + if (sr_rec->ev_u.epoll.sv1_added) { + int code; + + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, EPOLL_CTL_DEL, + sr_rec->sv[1], &sr_rec->ev_u.epoll.ctrl_ev); + if (code) { + code = errno; + __warnx(TIRPC_DEBUG_FLAG_WARN, + "%s: sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) unhook failed (%d)", + __func__, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], code); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) unhook event %p", + __func__, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], &sr_rec->ev_u.epoll.ctrl_ev); + } + } +#endif + + if (sr_rec->sv[0] >= 0) { + close(sr_rec->sv[0]); + sr_rec->sv[0] = -1; + } + + if (sr_rec->sv[1] >= 0) { + close(sr_rec->sv[1]); + sr_rec->sv[1] = -1; + } + +#if defined(TIRPC_EPOLL) + if (sr_rec->ev_u.epoll.epoll_fd > 0) { + close(sr_rec->ev_u.epoll.epoll_fd); + sr_rec->ev_u.epoll.epoll_fd = -1; + } +#endif +} + struct svc_rqst_set { mutex_t mtx; struct svc_rqst_rec *srr; @@ -160,6 +224,8 @@ SetNonBlock(int fd) void svc_rqst_init(uint32_t channels) { + int i; + mutex_lock(&svc_rqst_set.mtx); if (svc_rqst_set.srr) @@ -169,6 +235,10 @@ svc_rqst_init(uint32_t channels) svc_rqst_set.next_id = channels; svc_rqst_set.srr = mem_zalloc(channels * sizeof(struct svc_rqst_rec)); + for (i = 0; i < channels; i++) { + svc_rqst_rec_init(&svc_rqst_set.srr[i]); + } + unlock: mutex_unlock(&svc_rqst_set.mtx); } @@ -194,7 +264,8 @@ svc_rqst_lookup_chan(uint32_t chan_id) } /* forward declaration in lieu of moving code {WAS} */ -static void svc_rqst_run_task(struct work_pool_entry *); +static void svc_rqst_epoll_loop(struct work_pool_entry *wpe); +static void svc_complete_task(struct svc_rqst_rec *sr_rec, bool finished); static int svc_rqst_expire_cmpf(const struct opr_rbtree_node *lhs, @@ -222,7 +293,7 @@ svc_rqst_expire_ms(struct timespec *to) /* coarse nsec, not system time */ (void)clock_gettime(CLOCK_MONOTONIC_FAST, &ts); - timespecadd(&ts, to); + timespecadd(&ts, to, &ts); return timespec_ms(&ts); } @@ -230,7 +301,7 @@ void svc_rqst_expire_insert(struct clnt_req *cc) { struct cx_data *cx = CX_DATA(cc->cc_clnt); - struct svc_rqst_rec *sr_rec = (struct svc_rqst_rec *)cx->cx_rec->ev_p; + struct svc_rqst_rec *sr_rec = cx->cx_rec->ev_p; struct opr_rbtree_node *nv; cc->cc_expire_ms = svc_rqst_expire_ms(&cc->cc_timeout); @@ -246,6 +317,10 @@ svc_rqst_expire_insert(struct clnt_req *cc) } mutex_unlock(&sr_rec->ev_lock); + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: sv[0] fd %d before ev_sig (sr_rec %p)", + __func__, sr_rec->sv[0], + sr_rec); ev_sig(sr_rec->sv[0], 0); /* send wakeup */ } @@ -259,6 +334,10 @@ svc_rqst_expire_remove(struct clnt_req *cc) opr_rbtree_remove(&sr_rec->call_expires, &cc->cc_rqst); mutex_unlock(&sr_rec->ev_lock); + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: sv[0] fd %d before ev_sig (sr_rec %p)", + __func__, sr_rec->sv[0], + sr_rec); ev_sig(sr_rec->sv[0], 0); /* send wakeup */ } @@ -281,12 +360,28 @@ svc_rqst_expire_task(struct work_pool_entry *wpe) clnt_req_release(cc); } +static inline void +svc_rqst_release(struct svc_rqst_rec *sr_rec) +{ + if (atomic_dec_int32_t(&sr_rec->ev_refcnt) > 0) + return; + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: remove evchan %d control fd pair (%d:%d)", + __func__, sr_rec->id_k, + sr_rec->sv[0], sr_rec->sv[1]); + + svc_rqst_rec_destroy(sr_rec); +} + int svc_rqst_new_evchan(uint32_t *chan_id /* OUT */, void *u_data, uint32_t flags) { struct svc_rqst_rec *sr_rec; uint32_t n_id; - int code = 0; + int code = 0, i; + work_pool_fun_t fun = NULL; + int32_t ref_rec; mutex_lock(&svc_rqst_set.mtx); if (!svc_rqst_set.next_id) { @@ -297,26 +392,31 @@ svc_rqst_new_evchan(uint32_t *chan_id /* OUT */, void *u_data, uint32_t flags) return (0); } n_id = --(svc_rqst_set.next_id); + sr_rec = &svc_rqst_set.srr[n_id]; - if (atomic_postinc_int32_t(&sr_rec->ev_refcnt) > 0) { + ref_rec = atomic_postinc_int32_t(&sr_rec->ev_refcnt); + + if (ref_rec > 0) { /* already exists */ *chan_id = n_id; mutex_unlock(&svc_rqst_set.mtx); return (0); } + /* Track the references we have */ + ref_rec++; + flags |= SVC_RQST_FLAG_EPOLL; /* XXX */ /* create a pair of anonymous sockets for async event channel wakeups */ code = socketpair(AF_UNIX, SOCK_STREAM, 0, sr_rec->sv); if (code) { + code = errno; __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s: failed creating event signal socketpair (%d)", + "%s: failed creating event signal socketpair (%d) for sr_rec", __func__, code); - ++(svc_rqst_set.next_id); - mutex_unlock(&svc_rqst_set.mtx); - return (code); + goto fail; } /* set non-blocking */ @@ -326,6 +426,7 @@ svc_rqst_new_evchan(uint32_t *chan_id /* OUT */, void *u_data, uint32_t flags) #if defined(TIRPC_EPOLL) if (flags & SVC_RQST_FLAG_EPOLL) { sr_rec->ev_type = SVC_EVENT_EPOLL; + fun = svc_rqst_epoll_loop; /* XXX improve this too */ sr_rec->ev_u.epoll.max_events = @@ -346,25 +447,33 @@ svc_rqst_new_evchan(uint32_t *chan_id /* OUT */, void *u_data, uint32_t flags) mem_free(sr_rec->ev_u.epoll.events, sr_rec->ev_u.epoll.max_events * sizeof(struct epoll_event)); - ++(svc_rqst_set.next_id); - mutex_unlock(&svc_rqst_set.mtx); - return (EINVAL); + code = EINVAL; + goto fail; } /* permit wakeup of threads blocked in epoll_wait, with a * couple of possible semantics */ - sr_rec->ev_u.epoll.ctrl_ev.events = - EPOLLIN | EPOLLRDHUP; + sr_rec->ev_u.epoll.ctrl_ev.events = EPOLLIN | EPOLLRDHUP; sr_rec->ev_u.epoll.ctrl_ev.data.fd = sr_rec->sv[1]; - code = - epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, EPOLL_CTL_ADD, - sr_rec->sv[1], &sr_rec->ev_u.epoll.ctrl_ev); + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, EPOLL_CTL_ADD, + sr_rec->sv[1], &sr_rec->ev_u.epoll.ctrl_ev); if (code == -1) { code = errno; __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: add control socket failed (%d)", __func__, code); + goto fail; } + + sr_rec->ev_u.epoll.sv1_added = true; + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | TIRPC_DEBUG_FLAG_REFCNT, + "%s: sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d EPOLL_CTL_ADD code %d event %p", + __func__, + sr_rec, sr_rec->id_k, ref_rec, + sr_rec->ev_u.epoll.epoll_fd, code, + &sr_rec->ev_u.epoll.ctrl_ev); } else { /* legacy fdset (currently unhooked) */ sr_rec->ev_type = SVC_EVENT_FDSET; @@ -373,80 +482,141 @@ svc_rqst_new_evchan(uint32_t *chan_id /* OUT */, void *u_data, uint32_t flags) sr_rec->ev_type = SVC_EVENT_FDSET; #endif - *chan_id = + *chan_id = n_id; + sr_rec->id_k = n_id; sr_rec->ev_flags = flags & SVC_RQST_FLAG_MASK; opr_rbtree_init(&sr_rec->call_expires, svc_rqst_expire_cmpf); - mutex_init(&sr_rec->ev_lock, NULL); - - if (!code) { - atomic_inc_int32_t(&sr_rec->ev_refcnt); - sr_rec->ev_wpe.fun = svc_rqst_run_task; - sr_rec->ev_wpe.arg = u_data; - work_pool_submit(&svc_work_pool, &sr_rec->ev_wpe); - } - mutex_unlock(&svc_rqst_set.mtx); + atomic_inc_int32_t(&sr_rec->ev_refcnt); + ref_rec++; + sr_rec->ev_wpe.fun = fun; + sr_rec->ev_wpe.arg = u_data; + work_pool_submit(&svc_work_pool, &sr_rec->ev_wpe); __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, "%s: create evchan %d control fd pair (%d:%d)", __func__, n_id, sr_rec->sv[0], sr_rec->sv[1]); - return (code); -} -static inline void -svc_rqst_release(struct svc_rqst_rec *sr_rec) -{ - if (atomic_dec_int32_t(&sr_rec->ev_refcnt) > 0) - return; + if (code != 0) { +fail: + /* Release this event channel id */ + ++(svc_rqst_set.next_id); - __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, - "%s: remove evchan %d control fd pair (%d:%d)", - __func__, sr_rec->id_k, - sr_rec->sv[0], sr_rec->sv[1]); + /* Release sr_rec */ + for (i = 0; i < ref_rec; i++) + svc_rqst_release(sr_rec); + } + + mutex_unlock(&svc_rqst_set.mtx); - mutex_destroy(&sr_rec->ev_lock); + return (code); } /* * may be RPC_DPLX_LOCKED, and SVC_XPRT_FLAG_ADDED cleared */ static inline int -svc_rqst_unhook_events(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec) +svc_rqst_unhook_events(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec, + uint16_t ev_flags) { int code = EINVAL; +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, unhook, __func__, __LINE__, &rec->xprt, ev_flags); +#endif /* USE_LTTNG_NTIRPC */ + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: xprt %p fd %d ev_flags%s%s%s%s%s%s%s%s%s", + __func__, &rec->xprt, rec->xprt.xp_fd, + ev_flags & SVC_XPRT_FLAG_ADDED_RECV ? " ADDED_RECV" : "", + ev_flags & SVC_XPRT_FLAG_ADDED_SEND ? " ADDED_SEND" : "", + ev_flags & SVC_XPRT_FLAG_INITIAL ? " INITIAL" : "", + ev_flags & SVC_XPRT_FLAG_INITIALIZED ? " INITIALIZED" : "", + ev_flags & SVC_XPRT_FLAG_CLOSE ? " CLOSE" : "", + ev_flags & SVC_XPRT_FLAG_DESTROYING ? " DESTROYING" : "", + ev_flags & SVC_XPRT_FLAG_RELEASING ? " RELEASING" : "", + ev_flags & SVC_XPRT_FLAG_UREG ? " UREG" : "", + sr_rec->ev_flags & SVC_RQST_FLAG_SHUTDOWN + ? "sr_rec->ev_flags SHUTDOWN" : ""); + switch (sr_rec->ev_type) { #if defined(TIRPC_EPOLL) case SVC_EVENT_EPOLL: { - struct epoll_event *ev = &rec->ev_u.epoll.event; + struct epoll_event *ev; - /* clear epoll vector */ - code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, + if (ev_flags & SVC_XPRT_FLAG_ADDED_RECV) { + ev = &rec->ev_u.epoll.event_recv; + + /* clear epoll vector */ + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, EPOLL_CTL_DEL, rec->xprt.xp_fd, ev); - if (code) { - code = errno; - __warnx(TIRPC_DEBUG_FLAG_WARN, - "%s: %p fd %d xp_refcnt %" PRId32 - " sr_rec %p evchan %d ev_refcnt %" PRId32 - " epoll_fd %d control fd pair (%d:%d) unhook failed (%d)", - __func__, rec, rec->xprt.xp_fd, - rec->xprt.xp_refcnt, - sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, - sr_rec->ev_u.epoll.epoll_fd, - sr_rec->sv[0], sr_rec->sv[1], code); - } else { - __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | - TIRPC_DEBUG_FLAG_REFCNT, - "%s: %p fd %d xp_refcnt %" PRId32 - " sr_rec %p evchan %d ev_refcnt %" PRId32 - " epoll_fd %d control fd pair (%d:%d) unhook", - __func__, rec, rec->xprt.xp_fd, - rec->xprt.xp_refcnt, - sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, - sr_rec->ev_u.epoll.epoll_fd, - sr_rec->sv[0], sr_rec->sv[1]); + + if (code) { + code = errno; + __warnx(TIRPC_DEBUG_FLAG_WARN, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) unhook failed (%d)", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], code); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) unhook event %p", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], ev); + + atomic_clear_uint16_t_bits( + &rec->xprt.xp_flags, + SVC_XPRT_FLAG_ADDED_RECV); + } + } + + if (ev_flags & SVC_XPRT_FLAG_ADDED_SEND) { + ev = &rec->ev_u.epoll.event_send; + + /* clear epoll vector */ + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, + EPOLL_CTL_DEL, rec->xprt.xp_fd_send, ev); + + if (code) { + code = errno; + __warnx(TIRPC_DEBUG_FLAG_WARN, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) unhook failed (%d)", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], code); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) unhook event %p", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], ev); + + atomic_clear_uint16_t_bits( + &rec->xprt.xp_flags, + SVC_XPRT_FLAG_ADDED_SEND); + close(rec->xprt.xp_fd_send); + rec->xprt.xp_fd_send = -1; + } } break; } @@ -461,63 +631,132 @@ svc_rqst_unhook_events(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec) } /* - * not locked + * rpc_dplx_rec lock must be held */ int -svc_rqst_rearm_events(SVCXPRT *xprt) +svc_rqst_rearm_events_locked(SVCXPRT *xprt, uint16_t ev_flags) { struct rpc_dplx_rec *rec = REC_XPRT(xprt); - struct svc_rqst_rec *sr_rec = (struct svc_rqst_rec *)rec->ev_p; + struct svc_rqst_rec *sr_rec = rec->ev_p; int code = EINVAL; - if (xprt->xp_flags & (SVC_XPRT_FLAG_ADDED | SVC_XPRT_FLAG_DESTROYED)) +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, rearm, __func__, __LINE__, xprt, ev_flags); +#endif /* USE_LTTNG_NTIRPC */ + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: xprt %p fd %d ev_flags%s%s%s%s%s%s%s%s%s", + __func__, xprt, xprt->xp_fd, + ev_flags & SVC_XPRT_FLAG_ADDED_RECV ? " ADDED_RECV" : "", + ev_flags & SVC_XPRT_FLAG_ADDED_SEND ? " ADDED_SEND" : "", + ev_flags & SVC_XPRT_FLAG_INITIAL ? " INITIAL" : "", + ev_flags & SVC_XPRT_FLAG_INITIALIZED ? " INITIALIZED" : "", + ev_flags & SVC_XPRT_FLAG_CLOSE ? " CLOSE" : "", + ev_flags & SVC_XPRT_FLAG_DESTROYING ? " DESTROYING" : "", + ev_flags & SVC_XPRT_FLAG_RELEASING ? " RELEASING" : "", + ev_flags & SVC_XPRT_FLAG_UREG ? " UREG" : "", + sr_rec->ev_flags & SVC_RQST_FLAG_SHUTDOWN + ? "sr_rec->ev_flags SHUTDOWN" : ""); + + if (xprt->xp_flags & (ev_flags | SVC_XPRT_FLAG_DESTROYED)) return (0); /* MUST follow the destroyed check above */ if (sr_rec->ev_flags & SVC_RQST_FLAG_SHUTDOWN) return (0); - rpc_dplx_rli(rec); + /* Currently, can only be called with one of ADDED_RECV or ADDED_SEND, so we + * only need to take one ref. */ + SVC_REF(xprt, SVC_REF_FLAG_NONE); /* assuming success */ - atomic_set_uint16_t_bits(&xprt->xp_flags, SVC_XPRT_FLAG_ADDED); + atomic_set_uint16_t_bits(&xprt->xp_flags, ev_flags); switch (sr_rec->ev_type) { #if defined(TIRPC_EPOLL) case SVC_EVENT_EPOLL: { - struct epoll_event *ev = &rec->ev_u.epoll.event; - - /* set up epoll user data */ - ev->events = EPOLLIN | EPOLLONESHOT; + struct epoll_event *ev; + + if (ev_flags & SVC_XPRT_FLAG_ADDED_RECV) { + ev = &rec->ev_u.epoll.event_recv; + + /* set up epoll user data */ + ev->events = EPOLLIN | EPOLLONESHOT; + + /* rearm in epoll vector */ + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, + EPOLL_CTL_MOD, rec->xprt.xp_fd, ev); + if (code) { + code = errno; + atomic_clear_uint16_t_bits( + &xprt->xp_flags, + SVC_XPRT_FLAG_ADDED_RECV); + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) rearm failed (%d)", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], code); + SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %"PRId32 + " epoll_fd %d control fd pair (%d:%d) rearm event %p", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], ev); + } + } - /* rearm in epoll vector */ - code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, - EPOLL_CTL_MOD, xprt->xp_fd, ev); - if (code) { - code = errno; - atomic_clear_uint16_t_bits(&xprt->xp_flags, - SVC_XPRT_FLAG_ADDED); - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s: %p fd %d xp_refcnt %" PRId32 - " sr_rec %p evchan %d ev_refcnt %" PRId32 - " epoll_fd %d control fd pair (%d:%d) rearm failed (%d)", - __func__, rec, rec->xprt.xp_fd, - rec->xprt.xp_refcnt, - sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, - sr_rec->ev_u.epoll.epoll_fd, - sr_rec->sv[0], sr_rec->sv[1], code); - } else { - __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | - TIRPC_DEBUG_FLAG_REFCNT, - "%s: %p fd %d xp_refcnt %" PRId32 - " sr_rec %p evchan %d ev_refcnt %" PRId32 - " epoll_fd %d control fd pair (%d:%d) rearm", - __func__, rec, rec->xprt.xp_fd, - rec->xprt.xp_refcnt, - sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, - sr_rec->ev_u.epoll.epoll_fd, - sr_rec->sv[0], sr_rec->sv[1]); + if (ev_flags & SVC_XPRT_FLAG_ADDED_SEND) { + ev = &rec->ev_u.epoll.event_recv; + + /* set up epoll user data */ + ev->data.ptr = rec; + + /* wait for write events, edge triggered, oneshot */ + ev->events = EPOLLONESHOT | EPOLLOUT | EPOLLET; + + /* rearm in epoll vector */ + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, + EPOLL_CTL_MOD, rec->xprt.xp_fd_send, + ev); + + if (code) { + code = errno; + atomic_clear_uint16_t_bits( + &xprt->xp_flags, + SVC_XPRT_FLAG_ADDED_SEND); + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) rearm failed (%d)", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], code); + SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %"PRId32 + " epoll_fd %d control fd pair (%d:%d) rearm event %p", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], ev); + } } break; } @@ -528,8 +767,6 @@ svc_rqst_rearm_events(SVCXPRT *xprt) break; } /* switch */ - rpc_dplx_rui(rec); - return (code); } @@ -537,49 +774,117 @@ svc_rqst_rearm_events(SVCXPRT *xprt) * RPC_DPLX_LOCKED, and SVC_XPRT_FLAG_ADDED set */ static inline int -svc_rqst_hook_events(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec) +svc_rqst_hook_events(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec, + uint16_t ev_flags) { int code = EINVAL; +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, hook, __func__, __LINE__, &rec->xprt, ev_flags); +#endif /* USE_LTTNG_NTIRPC */ + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: xprt %p fd %d ev_flags%s%s%s%s%s%s%s%s%s", + __func__, &rec->xprt, rec->xprt.xp_fd, + ev_flags & SVC_XPRT_FLAG_ADDED_RECV ? " ADDED_RECV" : "", + ev_flags & SVC_XPRT_FLAG_ADDED_SEND ? " ADDED_SEND" : "", + ev_flags & SVC_XPRT_FLAG_INITIAL ? " INITIAL" : "", + ev_flags & SVC_XPRT_FLAG_INITIALIZED ? " INITIALIZED" : "", + ev_flags & SVC_XPRT_FLAG_CLOSE ? " CLOSE" : "", + ev_flags & SVC_XPRT_FLAG_DESTROYING ? " DESTROYING" : "", + ev_flags & SVC_XPRT_FLAG_RELEASING ? " RELEASING" : "", + ev_flags & SVC_XPRT_FLAG_UREG ? " UREG" : "", + sr_rec->ev_flags & SVC_RQST_FLAG_SHUTDOWN + ? "sr_rec->ev_flags SHUTDOWN" : ""); + + /* assuming success */ + atomic_set_uint16_t_bits(&rec->xprt.xp_flags, ev_flags); + switch (sr_rec->ev_type) { #if defined(TIRPC_EPOLL) case SVC_EVENT_EPOLL: { - struct epoll_event *ev = &rec->ev_u.epoll.event; - - /* set up epoll user data */ - ev->data.ptr = rec; - - /* wait for read events, level triggered, oneshot */ - ev->events = EPOLLIN | EPOLLONESHOT; + struct epoll_event *ev; + + if (ev_flags & SVC_XPRT_FLAG_ADDED_RECV) { + ev = &rec->ev_u.epoll.event_recv; + + /* set up epoll user data */ + ev->data.ptr = rec; + + /* wait for read events, level triggered, oneshot */ + ev->events = EPOLLONESHOT | EPOLLIN; + + /* add to epoll vector */ + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, + EPOLL_CTL_ADD, rec->xprt.xp_fd, ev); + if (code) { + code = errno; + atomic_clear_uint16_t_bits( + &rec->xprt.xp_flags, + SVC_XPRT_FLAG_ADDED_RECV); + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) direction in hook failed (%d)", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], code); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) direction in hook event %p", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], ev); + } + } - /* add to epoll vector */ - code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, - EPOLL_CTL_ADD, rec->xprt.xp_fd, ev); - if (code) { - code = errno; - atomic_clear_uint16_t_bits(&rec->xprt.xp_flags, - SVC_XPRT_FLAG_ADDED); - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s: %p fd %d xp_refcnt %" PRId32 - " sr_rec %p evchan %d ev_refcnt %" PRId32 - " epoll_fd %d control fd pair (%d:%d) hook failed (%d)", - __func__, rec, rec->xprt.xp_fd, - rec->xprt.xp_refcnt, - sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, - sr_rec->ev_u.epoll.epoll_fd, - sr_rec->sv[0], sr_rec->sv[1], code); - } else { - __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | - TIRPC_DEBUG_FLAG_REFCNT, - "%s: %p fd %d xp_refcnt %" PRId32 - " sr_rec %p evchan %d ev_refcnt %" PRId32 - " epoll_fd %d control fd pair (%d:%d) hook", - __func__, rec, rec->xprt.xp_fd, - rec->xprt.xp_refcnt, - sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, - sr_rec->ev_u.epoll.epoll_fd, - sr_rec->sv[0], sr_rec->sv[1]); + if (ev_flags & SVC_XPRT_FLAG_ADDED_SEND) { + ev = &rec->ev_u.epoll.event_send; + + /* set up epoll user data */ + ev->data.ptr = rec; + + /* wait for write events, edge triggered, oneshot */ + ev->events = EPOLLONESHOT | EPOLLOUT | EPOLLET; + + /* add to epoll vector */ + code = epoll_ctl(sr_rec->ev_u.epoll.epoll_fd, + EPOLL_CTL_ADD, rec->xprt.xp_fd_send, + ev); + if (code) { + code = errno; + atomic_clear_uint16_t_bits( + &rec->xprt.xp_flags, + SVC_XPRT_FLAG_ADDED_SEND); + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) direction out hook failed (%d)", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], code); + } else { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: %p fd %d xp_refcnt %" PRId32 + " sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d control fd pair (%d:%d) direction out hook event %p", + __func__, rec, rec->xprt.xp_fd, + rec->xprt.xp_refcnt, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, + sr_rec->sv[0], sr_rec->sv[1], ev); + } } break; } @@ -590,6 +895,10 @@ svc_rqst_hook_events(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec) break; } /* switch */ + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: sv[0] fd %d before ev_sig (sr_rec %p)", + __func__, sr_rec->sv[0], + sr_rec); ev_sig(sr_rec->sv[0], 0); /* send wakeup */ return (code); @@ -601,18 +910,121 @@ svc_rqst_hook_events(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec) static void svc_rqst_unreg(struct rpc_dplx_rec *rec, struct svc_rqst_rec *sr_rec) { - uint16_t xp_flags = atomic_postclear_uint16_t_bits(&rec->xprt.xp_flags, - SVC_XPRT_FLAG_ADDED); + uint16_t xp_flags = + atomic_postclear_uint16_t_bits(&rec->xprt.xp_flags, + SVC_XPRT_FLAG_ADDED_RECV | + SVC_XPRT_FLAG_ADDED_SEND); /* clear events */ - if (xp_flags & SVC_XPRT_FLAG_ADDED) - (void)svc_rqst_unhook_events(rec, sr_rec); + if (xp_flags & (SVC_XPRT_FLAG_ADDED_RECV | SVC_XPRT_FLAG_ADDED_SEND)) + (void)svc_rqst_unhook_events(rec, sr_rec, xp_flags); /* Unlinking after debug message ensures both the xprt and the sr_rec * are still present, as the xprt unregisters before release. */ - rec->ev_p = NULL; - svc_rqst_release(sr_rec); + if (rec->ev_p == sr_rec) { + rec->ev_p = NULL; + svc_rqst_release(sr_rec); + } +} + +void svc_rqst_xprt_send_complete(SVCXPRT *xprt) +{ + struct rpc_dplx_rec *rec = REC_XPRT(xprt); + struct svc_rqst_rec *sr_rec; + + sr_rec = rec->ev_p; + + if (!sr_rec) { + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: %p has no attached sr_rec", + __func__, xprt); + return; + } + + (void)svc_rqst_unhook_events(rec, sr_rec, SVC_XPRT_FLAG_ADDED_SEND); +} + +int +svc_rqst_evchan_write(SVCXPRT *xprt, struct xdr_ioq *xioq, bool has_blocked) +{ + struct rpc_dplx_rec *rec = REC_XPRT(xprt); + struct svc_rqst_rec *sr_rec; + int code; + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: xprt %p xioq %p has_blocked %s", + __func__, xprt, xioq, has_blocked ? "TRUE" : "FALSE"); + + sr_rec = rec->ev_p; + + if (!sr_rec) { + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: %p has no attached sr_rec", + __func__, xprt); + return (ENOENT); + } + + rec->ev_u.epoll.xioq_send = xioq; + +#if defined(TIRPC_EPOLL) + if (sr_rec->ev_type == SVC_EVENT_EPOLL) { + /* For send we need to dup the xprt fd */ + if (xprt->xp_fd_send == -1) { + xprt->xp_fd_send = dup(xprt->xp_fd); + + if (xprt->xp_fd_send< 0) { + code = errno; + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: failed duplicating fd (%d)", + __func__, code); + goto out; + } + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: xp_fd_send fd %d dup of xp_fd %d", + __func__, xprt->xp_fd_send, xprt->xp_fd); + } + } +#endif + + rpc_dplx_rli(rec); + + /* register on event channel */ + if (has_blocked) { + code = svc_rqst_rearm_events_locked(xprt, + SVC_XPRT_FLAG_ADDED_SEND); + } else { + /* svc_rqst_hook_events doesn't take a ref, so take one here */ + SVC_REF(xprt, SVC_REF_FLAG_NONE); + code = svc_rqst_hook_events(rec, sr_rec, + SVC_XPRT_FLAG_ADDED_SEND); + } + + if (code) { + __warnx(TIRPC_DEBUG_FLAG_ERROR, + "%s: failed hooking events (%d)", + __func__, code); + goto out; + } + + atomic_inc_int32_t(&sr_rec->ev_refcnt); + work_pool_submit(&svc_work_pool, &sr_rec->ev_wpe); + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: create evchan write control fd pair (%d:%d)", + __func__, + sr_rec->sv[0], sr_rec->sv[1]); + +out: + + if (code != 0) { + svc_rqst_release(sr_rec); + } + + rpc_dplx_rui(rec); + + return (code); } /* @@ -623,9 +1035,8 @@ svc_rqst_evchan_reg(uint32_t chan_id, SVCXPRT *xprt, uint32_t flags) { struct rpc_dplx_rec *rec = REC_XPRT(xprt); struct svc_rqst_rec *sr_rec; - struct svc_rqst_rec *ev_p; int code; - uint16_t bits = SVC_XPRT_FLAG_ADDED | (flags & SVC_XPRT_FLAG_UREG); + uint16_t bits = SVC_XPRT_FLAG_ADDED_RECV | (flags & SVC_XPRT_FLAG_UREG); if (chan_id == 0) { /* Create a global/legacy event channel */ @@ -642,6 +1053,7 @@ svc_rqst_evchan_reg(uint32_t chan_id, SVCXPRT *xprt, uint32_t flags) } sr_rec = svc_rqst_lookup_chan(chan_id); + if (!sr_rec) { __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p unknown evchan %d", @@ -652,9 +1064,8 @@ svc_rqst_evchan_reg(uint32_t chan_id, SVCXPRT *xprt, uint32_t flags) if (!(flags & RPC_DPLX_LOCKED)) rpc_dplx_rli(rec); - ev_p = (struct svc_rqst_rec *)rec->ev_p; - if (ev_p) { - if (ev_p == sr_rec) { + if (rec->ev_p) { + if (rec->ev_p == sr_rec) { if (!(flags & RPC_DPLX_LOCKED)) rpc_dplx_rui(rec); __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, @@ -662,17 +1073,17 @@ svc_rqst_evchan_reg(uint32_t chan_id, SVCXPRT *xprt, uint32_t flags) __func__, xprt, chan_id); return (0); } - svc_rqst_unreg(rec, ev_p); + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: %p unregistering evchan %d", + __func__, xprt, rec->ev_p->id_k); + svc_rqst_unreg(rec, rec->ev_p); } - /* assuming success */ - atomic_set_uint16_t_bits(&xprt->xp_flags, bits); - /* link from xprt */ rec->ev_p = sr_rec; - /* register on event channel */ - code = svc_rqst_hook_events(rec, sr_rec); + /* register sr_rec on event channel */ + code = svc_rqst_hook_events(rec, sr_rec, bits); if (!(flags & RPC_DPLX_LOCKED)) rpc_dplx_rui(rec); @@ -694,7 +1105,7 @@ svc_rqst_xprt_register(SVCXPRT *newxprt, SVCXPRT *xprt) newxprt, SVC_RQST_FLAG_CHAN_AFFINITY); - sr_rec = (struct svc_rqst_rec *) REC_XPRT(xprt)->ev_p; + sr_rec = REC_XPRT(xprt)->ev_p; /* or if parent xprt has no dedicated event channel */ if (!sr_rec) @@ -750,20 +1161,28 @@ svc_rqst_xprt_unregister(SVCXPRT *xprt, uint32_t flags) } /*static*/ void -svc_rqst_xprt_task(struct work_pool_entry *wpe) +svc_rqst_xprt_task_recv(struct work_pool_entry *wpe) { - struct rpc_dplx_rec *rec = - opr_containerof(wpe, struct rpc_dplx_rec, ioq.ioq_wpe); + struct xdr_ioq *ioq = + opr_containerof(wpe, struct xdr_ioq, ioq_wpe); + struct rpc_dplx_rec *rec = ioq->rec; - atomic_clear_uint16_t_bits(&rec->ioq.ioq_s.qflags, IOQ_FLAG_WORKING); + atomic_clear_uint16_t_bits(&ioq->ioq_s.qflags, IOQ_FLAG_WORKING); + +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv, __func__, __LINE__, + &rec->xprt, + (unsigned int)(rec->xprt.xp_flags & SVC_XPRT_FLAG_DESTROYED), + (unsigned int) rec->xprt.xp_refcnt); +#endif /* USE_LTTNG_NTIRPC */ /* atomic barrier (above) should protect following values */ if (rec->xprt.xp_refcnt > 1 - && !(rec->xprt.xp_flags & SVC_XPRT_FLAG_DESTROYED)) { + && !(rec->xprt.xp_flags & SVC_XPRT_FLAG_DESTROYED)) { /* (idempotent) xp_flags and xp_refcnt are set atomic. * xp_refcnt need more than 1 (this task). */ - (void)clock_gettime(CLOCK_MONOTONIC_FAST, &(rec->recv.ts)); + (void)clock_gettime(CLOCK_MONOTONIC_FAST, &rec->recv.ts); (void)SVC_RECV(&rec->xprt); } @@ -771,6 +1190,105 @@ svc_rqst_xprt_task(struct work_pool_entry *wpe) SVC_RELEASE(&rec->xprt, SVC_RELEASE_FLAG_NONE); } +enum xprt_stat svc_request(SVCXPRT *xprt, XDR *xdrs) +{ + enum xprt_stat stat; + struct svc_req *req = __svc_params->alloc_cb(xprt, xdrs); + struct rpc_dplx_rec *rpc_dplx_rec = REC_XPRT(xprt); + + /* Track the request we are processing */ + rpc_dplx_rec->svc_req = req; + + /* All decode functions basically do a + * return xprt->xp_dispatch.process_cb(req); + */ + stat = SVC_DECODE(req); + + if (stat == XPRT_SUSPEND) { + /* The rquest is suspended, don't touch the request in any way + * because the resume may already be scheduled and running on + * another thread. + */ + return XPRT_SUSPEND; + } + + if (req->rq_auth) + SVCAUTH_RELEASE(req); + + XDR_DESTROY(req->rq_xdrs); + + __svc_params->free_cb(req, stat); + + return stat; +} + +static void svc_resume_task(struct work_pool_entry *wpe) +{ + struct rpc_dplx_rec *rec = + opr_containerof(wpe, struct rpc_dplx_rec, ioq.ioq_wpe); + struct svc_req *req = rec->svc_req; + SVCXPRT *xprt = &rec->xprt; + enum xprt_stat stat; + + /* Resume the request. */ + stat = req->rq_xprt->xp_resume_cb(req); + + if (stat == XPRT_SUSPEND) { + /* The rquest is suspended, don't touch the request in any way + * because the resume may already be scheduled and running on + * another thread. + */ + return; + } + + if (req->rq_auth) + SVCAUTH_RELEASE(req); + + XDR_DESTROY(req->rq_xdrs); + + __svc_params->free_cb(req, stat); + + SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE); +} + +void svc_resume(struct svc_req *req) +{ + struct rpc_dplx_rec *rpc_dplx_rec = REC_XPRT(req->rq_xprt); + + rpc_dplx_rec->ioq.ioq_wpe.fun = svc_resume_task; + work_pool_submit(&svc_work_pool, &(rpc_dplx_rec->ioq.ioq_wpe)); +} + +/*static*/ void +svc_rqst_xprt_task_send(struct work_pool_entry *wpe) +{ + struct xdr_ioq *ioq = + opr_containerof(wpe, struct xdr_ioq, ioq_wpe); + struct rpc_dplx_rec *rec = ioq->rec; + + atomic_clear_uint16_t_bits(&ioq->ioq_s.qflags, IOQ_FLAG_WORKING); + +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, send, __func__, __LINE__, + &rec->xprt, + (unsigned int)(rec->xprt.xp_flags & SVC_XPRT_FLAG_DESTROYED), + (unsigned int) rec->xprt.xp_refcnt); +#endif /* USE_LTTNG_NTIRPC */ + + /* atomic barrier (above) should protect following values */ + if (rec->xprt.xp_refcnt > 1 + && !(rec->xprt.xp_flags & SVC_XPRT_FLAG_DESTROYED)) { + /* (idempotent) xp_flags and xp_refcnt are set atomic. + * xp_refcnt need more than 1 (this task). + */ + svc_ioq_write(&rec->xprt); + } + + /* If tests fail, log non-fatal "WARNING! already destroying!" + */ + SVC_RELEASE(&rec->xprt, SVC_RELEASE_FLAG_NONE); +} + /* * Like __svc_clean_idle but event-type independent. For now no cleanfds. */ @@ -840,11 +1358,13 @@ svc_rqst_clean_idle(int timeout) #ifdef TIRPC_EPOLL -static struct rpc_dplx_rec * +static struct xdr_ioq * svc_rqst_epoll_event(struct svc_rqst_rec *sr_rec, struct epoll_event *ev) { struct rpc_dplx_rec *rec = (struct rpc_dplx_rec *) ev->data.ptr; - uint16_t xp_flags; + uint16_t xp_flags, ev_flag = 0; + struct xdr_ioq *ioq = NULL; + work_pool_fun_t fun; if (unlikely(ev->data.fd == sr_rec->sv[1])) { /* signalled -- there was a wakeup on ctrl_ev (see @@ -862,34 +1382,67 @@ svc_rqst_epoll_event(struct svc_rqst_rec *sr_rec, struct epoll_event *ev) } /* Another task may release transport in parallel. - * Take extra reference now to keep window as small as possible. - * Under normal circumstances, worker task (above) will release. + * We have a ref from being in epoll, but since epoll is one-shot, a new ref + * will be taken when we re-enter epoll. Use this ref for the processor + * without taking another one. */ - SVC_REF(&rec->xprt, SVC_REF_FLAG_NONE); + + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: event %p %08x%s%s rpc_dplx_rec %p (sr_rec %p)", + __func__, ev, ev->events, + ev->events & EPOLLIN ? " RECV" : "", + ev->events & EPOLLOUT ? " SEND" : "", + rec, sr_rec); + + if (ev->events & EPOLLIN) { + /* This is a RECV event */ + ev_flag = SVC_XPRT_FLAG_ADDED_RECV; + ioq = &rec->ioq; + fun = svc_rqst_xprt_task_recv; + } else if (ev->events & EPOLLOUT) { + /* This is a SEND event */ + ev_flag = SVC_XPRT_FLAG_ADDED_SEND; + ioq = rec->ev_u.epoll.xioq_send; + fun = svc_rqst_xprt_task_send; + } else { + /* This is some other event... */ + SVC_RELEASE(&rec->xprt, SVC_RELEASE_FLAG_NONE); + return NULL; + } /* MUST handle flags after reference. * Although another task may unhook, the error is non-fatal. */ - xp_flags = atomic_postclear_uint16_t_bits(&rec->xprt.xp_flags, - SVC_XPRT_FLAG_ADDED); + xp_flags = atomic_postclear_uint16_t_bits(&rec->xprt.xp_flags, ev_flag); __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | TIRPC_DEBUG_FLAG_REFCNT, "%s: %p fd %d xp_refcnt %" PRId32 - " event %d", + " event %08x xp_flags%s%s clear flag%s%s", __func__, rec, rec->xprt.xp_fd, rec->xprt.xp_refcnt, - ev->events); + ev->events, + xp_flags & SVC_XPRT_FLAG_ADDED_RECV ? " ADDED_RECV" : "", + xp_flags & SVC_XPRT_FLAG_ADDED_SEND ? " ADDED_SEND" : "", + ev_flag & SVC_XPRT_FLAG_ADDED_RECV ? " ADDED_RECV" : "", + ev_flag & SVC_XPRT_FLAG_ADDED_SEND ? " ADDED_SEND" : ""); + +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, event, __func__, __LINE__, &rec->xprt, xp_flags, + ev_flag); +#endif /* USE_LTTNG_NTIRPC */ if (rec->xprt.xp_refcnt > 1 - && (xp_flags & SVC_XPRT_FLAG_ADDED) - && !(xp_flags & SVC_XPRT_FLAG_DESTROYED) - && !(atomic_postset_uint16_t_bits(&rec->ioq.ioq_s.qflags, - IOQ_FLAG_WORKING) - & IOQ_FLAG_WORKING)) { + && (xp_flags & ev_flag) + && !(xp_flags & SVC_XPRT_FLAG_DESTROYED) + && !(atomic_postset_uint16_t_bits(&ioq->ioq_s.qflags, + IOQ_FLAG_WORKING) + & IOQ_FLAG_WORKING)) { /* (idempotent) xp_flags and xp_refcnt are set atomic. * xp_refcnt need more than 1 (this event). */ - return (rec); + ioq->ioq_wpe.fun = fun; + ioq->rec = rec; + return ioq; } /* Do not return destroyed transports. @@ -902,60 +1455,51 @@ svc_rqst_epoll_event(struct svc_rqst_rec *sr_rec, struct epoll_event *ev) /* * not locked */ -static inline bool +static inline struct xdr_ioq * svc_rqst_epoll_events(struct svc_rqst_rec *sr_rec, int n_events) { - struct rpc_dplx_rec *rec = NULL; + struct xdr_ioq *ioq = NULL; int ix = 0; + /* Find the first RECV or SEND event */ while (ix < n_events) { - rec = svc_rqst_epoll_event(sr_rec, - &(sr_rec->ev_u.epoll.events[ix++])); - if (rec) + ioq = svc_rqst_epoll_event(sr_rec, + &sr_rec->ev_u.epoll.events[ix++]); + if (ioq) break; } - if (!rec) { + if (!ioq) { /* continue waiting for events with this task */ - return false; + return NULL; } while (ix < n_events) { - struct rpc_dplx_rec *rec = svc_rqst_epoll_event(sr_rec, + /* Queue up additional RECV or SEND events */ + struct xdr_ioq *ioq = svc_rqst_epoll_event(sr_rec, &(sr_rec->ev_u.epoll.events[ix++])); - if (!rec) - continue; - - rec->ioq.ioq_wpe.fun = svc_rqst_xprt_task; - work_pool_submit(&svc_work_pool, &(rec->ioq.ioq_wpe)); + if (ioq) + work_pool_submit(&svc_work_pool, &ioq->ioq_wpe); } /* submit another task to handle events in order */ atomic_inc_int32_t(&sr_rec->ev_refcnt); work_pool_submit(&svc_work_pool, &sr_rec->ev_wpe); - /* in most cases have only one event, use this hot thread */ - rec->ioq.ioq_wpe.fun = svc_rqst_xprt_task; - svc_rqst_xprt_task(&(rec->ioq.ioq_wpe)); - - /* failsafe idle processing after work task */ - if (atomic_postclear_uint32_t_bits(&wakeups, ~SVC_RQST_WAKEUPS) - > SVC_RQST_WAKEUPS) { - svc_rqst_clean_idle(__svc_params->idle_timeout); - } - - return true; + return ioq; } -static inline bool -svc_rqst_epoll_loop(struct svc_rqst_rec *sr_rec) +static void svc_rqst_epoll_loop(struct work_pool_entry *wpe) { + struct svc_rqst_rec *sr_rec = + opr_containerof(wpe, struct svc_rqst_rec, ev_wpe); struct clnt_req *cc; struct opr_rbtree_node *n; struct timespec ts; int timeout_ms; int expire_ms; int n_events; + bool finished; for (;;) { timeout_ms = SVC_RQST_TIMEOUT_MS; @@ -1004,17 +1548,48 @@ svc_rqst_epoll_loop(struct svc_rqst_rec *sr_rec) __func__, sr_rec->ev_u.epoll.epoll_fd, n_events); - return true; + finished = true; + break; } if (n_events > 0) { - atomic_add_uint32_t(&wakeups, n_events); + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d n_events %d", + __func__, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd, n_events); - if (svc_rqst_epoll_events(sr_rec, n_events)) - return false; + atomic_add_uint32_t(&wakeups, n_events); + struct xdr_ioq *ioq; + + ioq = svc_rqst_epoll_events(sr_rec, n_events); + + if (ioq != NULL) { + /* use this hot thread for the first event */ + ioq->ioq_wpe.fun(&ioq->ioq_wpe); + + /* failsafe idle processing after work task */ + if (atomic_postclear_uint32_t_bits( + &wakeups, ~SVC_RQST_WAKEUPS) + > SVC_RQST_WAKEUPS) { + svc_rqst_clean_idle( + __svc_params->idle_timeout); + } + finished = false; + break; + } continue; } if (!n_events) { /* timed out (idle) */ + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d idle", + __func__, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd); atomic_inc_uint32_t(&wakeups); continue; } @@ -1025,45 +1600,31 @@ svc_rqst_epoll_loop(struct svc_rqst_rec *sr_rec) __func__, sr_rec->ev_u.epoll.epoll_fd, n_events); - return true; + finished = true; + break; } } -} -#endif + if (finished) { + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST | + TIRPC_DEBUG_FLAG_REFCNT, + "%s: sr_rec %p evchan %d ev_refcnt %" PRId32 + " epoll_fd %d finished", + __func__, + sr_rec, sr_rec->id_k, sr_rec->ev_refcnt, + sr_rec->ev_u.epoll.epoll_fd); -/* - * No locking, "there can be only one" - */ -static void -svc_rqst_run_task(struct work_pool_entry *wpe) -{ - struct svc_rqst_rec *sr_rec = - opr_containerof(wpe, struct svc_rqst_rec, ev_wpe); - bool finished; + close(sr_rec->ev_u.epoll.epoll_fd); + mem_free(sr_rec->ev_u.epoll.events, + sr_rec->ev_u.epoll.max_events * + sizeof(struct epoll_event)); + } - /* enter event loop */ - switch (sr_rec->ev_type) { -#if defined(TIRPC_EPOLL) - case SVC_EVENT_EPOLL: - finished = svc_rqst_epoll_loop(sr_rec); - if (finished) { - close(sr_rec->ev_u.epoll.epoll_fd); - mem_free(sr_rec->ev_u.epoll.events, - sr_rec->ev_u.epoll.max_events * - sizeof(struct epoll_event)); - } - break; + svc_complete_task(sr_rec, finished); +} #endif - default: - finished = true; - /* XXX formerly select/fd_set case, now placeholder for new - * event systems, reworked select, etc. */ - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s: unsupported event type", - __func__); - break; - } /* switch */ +static void svc_complete_task(struct svc_rqst_rec *sr_rec, bool finished) +{ if (finished) { /* reference count here should be 2: * 1 svc_rqst_set @@ -1088,11 +1649,12 @@ svc_rqst_thrd_signal(uint32_t chan_id, uint32_t flags) return (ENOENT); } + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: sv[0] fd %d before ev_sig (sr_rec %p) evchan %d", + __func__, sr_rec->sv[0], + sr_rec, chan_id); ev_sig(sr_rec->sv[0], flags); /* send wakeup */ - __warnx(TIRPC_DEBUG_FLAG_ERROR, - "%s: signalled evchan %d", - __func__, chan_id); svc_rqst_release(sr_rec); return (0); } @@ -1108,6 +1670,10 @@ svc_rqst_delete_evchan(uint32_t chan_id) return (ENOENT); } atomic_set_uint16_t_bits(&sr_rec->ev_flags, SVC_RQST_FLAG_SHUTDOWN); + __warnx(TIRPC_DEBUG_FLAG_SVC_RQST, + "%s: sv[0] fd %d before ev_sig (sr_rec %p)", + __func__, sr_rec->sv[0], + sr_rec); ev_sig(sr_rec->sv[0], SVC_RQST_FLAG_SHUTDOWN); svc_rqst_release(sr_rec); diff --git a/src/svc_vc.c b/src/svc_vc.c index 6301cebf9b..38cc99f1bd 100644 --- a/src/svc_vc.c +++ b/src/svc_vc.c @@ -414,6 +414,10 @@ svc_vc_rendezvous(SVCXPRT *xprt) static int n = 1; struct timeval timeval; +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, funcin, __func__, __LINE__, xprt); +#endif /* USE_LTTNG_NTIRPC */ + again: len = sizeof(addr); fd = accept(xprt->xp_fd, (struct sockaddr *)(void *)&addr, &len); @@ -438,7 +442,7 @@ svc_vc_rendezvous(SVCXPRT *xprt) } return (XPRT_DIED); } - if (unlikely(svc_rqst_rearm_events(xprt))) { + if (unlikely(svc_rqst_rearm_events(xprt, SVC_XPRT_FLAG_ADDED_RECV))) { __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p fd %d svc_rqst_rearm_events failed (will set dead)", __func__, xprt, xprt->xp_fd); @@ -453,8 +457,10 @@ svc_vc_rendezvous(SVCXPRT *xprt) */ newxprt = makefd_xprt(fd, req_xd->sx_dr.sendsz, req_xd->sx_dr.recvsz, &si, SVC_XPRT_FLAG_CLOSE); - if ((!newxprt) || (!(newxprt->xp_flags & SVC_XPRT_FLAG_INITIAL))) + if ((!newxprt) || (!(newxprt->xp_flags & SVC_XPRT_FLAG_INITIAL))) { + close(fd); return (XPRT_DIED); + } svc_vc_override_ops(newxprt, xprt); @@ -507,6 +513,8 @@ svc_vc_rendezvous(SVCXPRT *xprt) if (xprt->xp_dispatch.rendezvous_cb(newxprt) || svc_rqst_xprt_register(newxprt, xprt)) { SVC_DESTROY(newxprt); + /* Was never added to epoll */ + SVC_RELEASE(newxprt, SVC_RELEASE_FLAG_NONE); return (XPRT_DESTROYED); } return (XPRT_IDLE); @@ -534,6 +542,9 @@ svc_vc_destroy_task(struct work_pool_entry *wpe) if ((xp_flags & SVC_XPRT_FLAG_CLOSE) && rec->xprt.xp_fd != RPC_ANYFD) { (void)close(rec->xprt.xp_fd); + __warnx(TIRPC_DEBUG_FLAG_SVC_VC, + "%s: fd %d closed", + __func__, rec->xprt.xp_fd); rec->xprt.xp_fd = RPC_ANYFD; } @@ -659,6 +670,10 @@ svc_vc_recv(SVCXPRT *xprt) u_int flags; int code; +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, funcin, __func__, __LINE__, xprt); +#endif /* USE_LTTNG_NTIRPC */ + /* no need for locking, only one svc_rqst_xprt_task() per event. * depends upon svc_rqst_rearm_events() for ordering. */ @@ -683,19 +698,30 @@ svc_vc_recv(SVCXPRT *xprt) __warnx(TIRPC_DEBUG_FLAG_WARN, "%s: %p fd %d recv errno %d (try again)", "svc_vc_wait", xprt, xprt->xp_fd, code); - if (unlikely(svc_rqst_rearm_events(xprt))) { + if (unlikely(svc_rqst_rearm_events( + xprt, + SVC_XPRT_FLAG_ADDED_RECV))) { __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p fd %d svc_rqst_rearm_events failed (will set dead)", "svc_vc_wait", xprt, xprt->xp_fd); SVC_DESTROY(xprt); + code = EINVAL; } +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "EAGAIN", code); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } __warnx(TIRPC_DEBUG_FLAG_WARN, "%s: %p fd %d recv errno %d (will set dead)", "svc_vc_wait", xprt, xprt->xp_fd, code); SVC_DESTROY(xprt); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "ERROR", code); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } @@ -704,6 +730,10 @@ svc_vc_recv(SVCXPRT *xprt) "%s: %p fd %d recv closed (will set dead)", "svc_vc_wait", xprt, xprt->xp_fd); SVC_DESTROY(xprt); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "EMPTY", 0); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } @@ -720,9 +750,17 @@ svc_vc_recv(SVCXPRT *xprt) "%s: %p fd %d fragment is zero (will set dead)", __func__, xprt, xprt->xp_fd); SVC_DESTROY(xprt); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "NO RECORD", 0); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_frag, __func__, __LINE__, + xprt, xd->sx_fbtbc); +#endif /* USE_LTTNG_NTIRPC */ /* one buffer per fragment */ uv = xdr_ioq_uv_create(xd->sx_fbtbc, flags); (xioq->ioq_uv.uvqh.qcount)++; @@ -741,18 +779,29 @@ svc_vc_recv(SVCXPRT *xprt) __warnx(TIRPC_DEBUG_FLAG_SVC_VC, "%s: %p fd %d recv errno %d (try again)", __func__, xprt, xprt->xp_fd, code); - if (unlikely(svc_rqst_rearm_events(xprt))) { + if (unlikely(svc_rqst_rearm_events( + xprt, + SVC_XPRT_FLAG_ADDED_RECV))) { __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p fd %d svc_rqst_rearm_events failed (will set dead)", __func__, xprt, xprt->xp_fd); SVC_DESTROY(xprt); + code = EINVAL; } +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "EAGAIN", code); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p fd %d recv errno %d (will set dead)", __func__, xprt, xprt->xp_fd, code); SVC_DESTROY(xprt); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "ERROR", code); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } @@ -761,9 +810,18 @@ svc_vc_recv(SVCXPRT *xprt) "%s: %p fd %d recv closed (will set dead)", __func__, xprt, xprt->xp_fd); SVC_DESTROY(xprt); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "EMPTY", 0); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_bytes, __func__, __LINE__, + xprt, xd->sx_fbtbc, rlen); +#endif /* USE_LTTNG_NTIRPC */ + uv->v.vio_tail += rlen; xd->sx_fbtbc -= rlen; @@ -772,12 +830,22 @@ svc_vc_recv(SVCXPRT *xprt) __func__, xprt, xprt->xp_fd, rlen, xd->sx_fbtbc, flags); if (xd->sx_fbtbc || (flags & UIO_FLAG_MORE)) { - if (unlikely(svc_rqst_rearm_events(xprt))) { + if (unlikely(svc_rqst_rearm_events(xprt, + SVC_XPRT_FLAG_ADDED_RECV))) { __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p fd %d svc_rqst_rearm_events failed (will set dead)", __func__, xprt, xprt->xp_fd); SVC_DESTROY(xprt); +#ifndef USE_LTTNG_NTIRPC + } +#else + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "REARM FAILED", -1); + } else { + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "MORE", 0); } +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } @@ -786,16 +854,24 @@ svc_vc_recv(SVCXPRT *xprt) TAILQ_REMOVE(&rec->ioq.ioq_uv.uvqh.qh, &xioq->ioq_s, q); xdr_ioq_reset(xioq, 0); - if (unlikely(svc_rqst_rearm_events(xprt))) { + if (unlikely(svc_rqst_rearm_events(xprt, SVC_XPRT_FLAG_ADDED_RECV))) { __warnx(TIRPC_DEBUG_FLAG_ERROR, "%s: %p fd %d svc_rqst_rearm_events failed (will set dead)", __func__, xprt, xprt->xp_fd); xdr_ioq_destroy(xioq, xioq->ioq_s.qsize); SVC_DESTROY(xprt); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "REARM FAILED", -1); +#endif /* USE_LTTNG_NTIRPC */ return SVC_STAT(xprt); } - return (__svc_params->request_cb(xprt, xioq->xdrs)); +#ifdef USE_LTTNG_NTIRPC + tracepoint(xprt, recv_exit, __func__, __LINE__, + xprt, "CALLING svc_request", 0); +#endif /* USE_LTTNG_NTIRPC */ + return svc_request(xprt, xioq->xdrs); } static enum xprt_stat @@ -858,18 +934,12 @@ svc_vc_reply(struct svc_req *req) SVCXPRT *xprt = req->rq_xprt; struct xdr_ioq *xioq; - /* XXX Until gss_get_mic and gss_wrap can be replaced with - * iov equivalents, replies with RPCSEC_GSS security must be - * encoded in a contiguous buffer. - * - * Nb, we should probably use getpagesize() on Unix. Need + /* Nb, we should probably use getpagesize() on Unix. Need * an equivalent for Windows. */ xioq = xdr_ioq_create(RPC_MAXDATA_DEFAULT, __svc_params->ioq.send_max + RPC_MAXDATA_DEFAULT, - (req->rq_msg.cb_cred.oa_flavor == RPCSEC_GSS) - ? UIO_FLAG_REALLOC | UIO_FLAG_FREE - : UIO_FLAG_FREE); + UIO_FLAG_FREE); if (!xdr_reply_encode(xioq->xdrs, &req->rq_msg)) { __warnx(TIRPC_DEBUG_FLAG_ERROR, diff --git a/src/svc_xprt.c b/src/svc_xprt.c index 745b2164a7..d0bc832121 100644 --- a/src/svc_xprt.c +++ b/src/svc_xprt.c @@ -172,8 +172,12 @@ svc_xprt_lookup(int fd, svc_xprt_setup_t setup) } (*setup)(&xprt); /* zalloc, xp_refcnt = 1 */ xprt->xp_fd = fd; + xprt->xp_fd_send = -1; xprt->xp_flags = SVC_XPRT_FLAG_INITIAL; + /* Get ref for caller */ + SVC_REF(xprt, SVC_REF_FLAG_NONE); + rec = REC_XPRT(xprt); rpc_dplx_rli(rec); if (opr_rbtree_insert(&t->t, &rec->fd_node)) { diff --git a/src/work_pool.c b/src/work_pool.c index ea63602a1f..7a1b53967d 100644 --- a/src/work_pool.c +++ b/src/work_pool.c @@ -55,6 +55,7 @@ #include #include #include +#include #include @@ -149,6 +150,8 @@ work_pool_thread(void *arg) int rc; bool spawn; + rcu_register_thread(); + pthread_cond_init(&wpt->pqcond, NULL); pthread_mutex_lock(&pool->pqh.qmutex); TAILQ_INSERT_TAIL(&pool->wptqh, wpt, wptq); @@ -238,6 +241,7 @@ work_pool_thread(void *arg) __func__, wpt->worker_name); cond_destroy(&wpt->pqcond); mem_free(wpt, sizeof(*wpt)); + rcu_unregister_thread(); return (NULL); } diff --git a/src/xdr_ioq.c b/src/xdr_ioq.c index 35f6ed4a32..eef5efebed 100644 --- a/src/xdr_ioq.c +++ b/src/xdr_ioq.c @@ -53,8 +53,6 @@ #include -static bool xdr_ioq_noop(void) __attribute__ ((unused)); - #define VREC_MAXBUFS 24 static uint64_t next_id; @@ -82,6 +80,10 @@ xdr_ioq_uv_create(size_t size, u_int uio_flags) uv->u.uio_flags = uio_flags; uv->u.uio_references = 1; /* starting one */ + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s() uv %p size %lu", + __func__, uv, (unsigned long) size); + return (uv); } @@ -188,16 +190,16 @@ xdr_ioq_uv_recycle(struct poolq_head *ioqh, struct poolq_entry *have) void xdr_ioq_uv_release(struct xdr_ioq_uv *uv) { - if (uv->u.uio_refer) { - /* not optional in this case! */ - uv->u.uio_refer->uio_release(uv->u.uio_refer, UIO_FLAG_NONE); - uv->u.uio_refer = NULL; - } - if (!(--uv->u.uio_references)) { if (uv->u.uio_release) { /* handle both xdr_ioq_uv and vio */ uv->u.uio_release(&uv->u, UIO_FLAG_NONE); + } else if (uv->u.uio_flags & UIO_FLAG_REFER) { + /* not optional in this case! */ + __warnx(TIRPC_DEBUG_FLAG_XDR, "Call uio_release"); + uv->u.uio_refer->uio_release(uv->u.uio_refer, + UIO_FLAG_NONE); + mem_free(uv, sizeof(*uv)); } else if (uv->u.uio_flags & UIO_FLAG_FREE) { free_buffer(uv->v.vio_base, ioquv_size(uv)); mem_free(uv, sizeof(*uv)); @@ -327,6 +329,13 @@ xdr_ioq_uv_advance(struct xdr_ioq *xioq) len = ioquv_length(uv); xioq->ioq_uv.plength += len; +#if 0 + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s() uv %p len %lu plength %lu NEXT %p", + __func__, uv, (unsigned long) len, (unsigned long) xioq->ioq_uv.plength, + IOQ_(TAILQ_NEXT(&uv->uvq, q))); +#endif + /* next buffer, if any */ return IOQ_(TAILQ_NEXT(&uv->uvq, q)); } @@ -574,6 +583,13 @@ xdr_ioq_putbufs(XDR *xdrs, xdr_uio *uio, u_int flags) xdr_vio *v; int ix; + /* update the most recent data length, just in case */ + xdr_tail_update(xdrs); + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s Before putbufs - pos %lu", + __func__, (unsigned long) XDR_GETPOS(xdrs)); + for (ix = 0; ix < uio->uio_count; ++ix) { /* advance fill pointer, do not allocate buffers, refs =1 */ uv = xdr_ioq_uv_advance(XIOQ(xdrs)); @@ -583,9 +599,29 @@ xdr_ioq_putbufs(XDR *xdrs, xdr_uio *uio, u_int flags) xdr_ioq_uv_update(XIOQ(xdrs), uv); v = &(uio->uio_vio[ix]); - uv->u.uio_flags = UIO_FLAG_NONE; /* !RECLAIM */ + uv->u.uio_flags = UIO_FLAG_REFER; uv->v = *v; + /* save original buffer sequence for rele */ + uv->u.uio_refer = uio; + (uio->uio_references)++; + + /* Now update the XDR position */ + xdrs->x_data = uv->v.vio_tail; + xdrs->x_base = &uv->v; + xdrs->x_v = uv->v; + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s After putbufs Examining xdr_ioq_uv %p (base %p head %p tail %p wrap %p len %lu full %lu) pos %lu", + __func__, uv, uv->v.vio_base, uv->v.vio_head, + uv->v.vio_tail, uv->v.vio_wrap, + (unsigned long) ioquv_length(uv), + (unsigned long) (uintptr_t)xdrs->x_v.vio_wrap + - (uintptr_t)xdrs->x_v.vio_head, + (unsigned long) XDR_GETPOS(xdrs)); + } + + return (TRUE); #if 0 Saved for later golden buttery results -- Matt if (flags & XDR_PUTBUFS_FLAG_BRELE) { @@ -617,7 +653,6 @@ Saved for later golden buttery results -- Matt uv->v.vio_head = 0; } } -#endif /* save original buffer sequence for rele */ if (ix == 0) { uv->u.uio_refer = uio; @@ -626,6 +661,7 @@ Saved for later golden buttery results -- Matt } return (TRUE); +#endif } /* @@ -664,13 +700,28 @@ xdr_ioq_setpos(XDR *xdrs, u_int pos) TAILQ_FOREACH(have, &(XIOQ(xdrs)->ioq_uv.uvqh.qh), q) { struct xdr_ioq_uv *uv = IOQ_(have); + struct xdr_ioq_uv *next = IOQ_(TAILQ_NEXT(have, q)); u_int len = ioquv_length(uv); u_int full = (uintptr_t)xdrs->x_v.vio_wrap - (uintptr_t)xdrs->x_v.vio_head; - if (pos <= full) { - /* allow up to the end of the buffer, - * assuming next operation will extend. + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s Examining xdr_ioq_uv %p (base %p head %p tail %p wrap %p len %lu full %lu) - %s pos %lu", + __func__, uv, uv->v.vio_base, uv->v.vio_head, + uv->v.vio_tail, uv->v.vio_wrap, + (unsigned long) len, (unsigned long) full, + next ? "more" : "last", + (unsigned long) pos); + + /* If we have a next buffer and pos would land exactly at the + * tail of this buffer, we want to force positioning in the + * next buffer. The space between the tail of this buffer and + * the wrap of this buffer is unused and MUST be skipped. + */ + if ((pos < len) || (next == NULL && pos <= full)) { + /* allow up to the end of the buffer, unless there is + * a next buffer in which case only allow up to the + * tail assuming next operation will extend. */ xdrs->x_data = uv->v.vio_head + pos; xdrs->x_base = &uv->v; @@ -682,6 +733,10 @@ xdr_ioq_setpos(XDR *xdrs, u_int pos) XIOQ(xdrs)->ioq_uv.pcount++; } + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s failing with remaining %lu", + __func__, (unsigned long) pos); + return (false); } @@ -760,9 +815,444 @@ xdr_ioq_control(XDR *xdrs, /* const */ int rq, void *in) } static bool -xdr_ioq_noop(void) +xdr_ioq_newbuf(XDR *xdrs) { - return (false); + struct xdr_ioq_uv *uv; + + /* We need to start a new buffer whether the current buffer is full or + * not. + */ + uv = xdr_ioq_uv_advance(XIOQ(xdrs)); + + if (!uv) + uv = xdr_ioq_uv_append(XIOQ(xdrs), IOQ_FLAG_BALLOC); + else + xdr_ioq_uv_update(XIOQ(xdrs), uv); + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s() uv %p", + __func__, uv); + + /* At this point, the position has been updated to point to the + * start of the new buffer since xdr_ioq_uv_update has been called + * (it's called at the end of xdr_ioq_uv_append). + */ + return true; +} + +static int +xdr_ioq_iovcount(XDR *xdrs, u_int start, u_int datalen) +{ + /* Buffers starts at -1 to indicate start has not yet been found */ + int buffers = -1; + struct poolq_entry *have; + struct xdr_ioq_uv *uv; + + /* update the most recent data length, just in case */ + xdr_tail_update(xdrs); + + TAILQ_FOREACH(have, &(XIOQ(xdrs)->ioq_uv.uvqh.qh), q) { + u_int len; + + uv = IOQ_(have); + len = ioquv_length(uv); + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s Examining xdr_ioq_uv %p (base %p head %p tail %p wrap %p) - start %lu len %lu buffers %d", + __func__, uv, uv->v.vio_base, uv->v.vio_head, + uv->v.vio_tail, uv->v.vio_wrap, + (unsigned long) start, (unsigned long) len, buffers); + + if (buffers > 0) { + /* Accumulate another buffer */ + buffers++; + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Accumulated another buffer total = %d", + buffers); + } else if (start < len) { + /* We have found the buffer that start begins. */ + buffers = 1; + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Starting total = %d", buffers); + } else { + /* Keep looking, need to reduce start by the length of + * this buffer. + */ + start -= len; + } + if (buffers > 0) { + /* Now we need to decrement the datalen to see if we're + * done. Note the first time we come in, start may not + * be zero, which represents the fact that start was in + * the middle of this buffer, just subtract the + * remaining start from the length of this buffer. + */ + u_int buflen = uv->v.vio_tail - uv->v.vio_head - start; + if (buflen >= datalen) { + /* We have found end. */ + datalen = 0; + break; + } + + /* Decrement the datalen, and zero out start for future + * buffers. + */ + datalen -= buflen; + start = 0; + } + } + + if (datalen != 0) { + /* There wasn't enough data... */ + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s start %lu remain %lu", + __func__, (unsigned long) start, + (unsigned long) datalen); + return -1; + } + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s start %lu buffers %d", + __func__, (unsigned long) start, buffers); + + /* If start was not within the xdr stream, buffers will still be -1 */ + return buffers; +} + +static bool +xdr_ioq_fillbufs(XDR *xdrs, u_int start, xdr_vio *vector, u_int datalen) +{ + bool found = false; + struct poolq_entry *have; + struct xdr_ioq_uv *uv; + int idx = 0; + + /* update the most recent data length, just in case */ + xdr_tail_update(xdrs); + + TAILQ_FOREACH(have, &(XIOQ(xdrs)->ioq_uv.uvqh.qh), q) { + u_int len; + + uv = IOQ_(have); + len = ioquv_length(uv); + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s Examining xdr_ioq_uv %p (base %p head %p tail %p wrap %p len %lu) - %s start %lu remain %lu idx %d", + __func__, uv, uv->v.vio_base, uv->v.vio_head, + uv->v.vio_tail, uv->v.vio_wrap, + (unsigned long) len, + found ? "found" : "not found", + (unsigned long) start, (unsigned long) datalen, idx); + + if (!found) { + if (start < len) { + /* We have found the buffer that start begins. + */ + found = true; + __warnx(TIRPC_DEBUG_FLAG_XDR, "found"); + } else { + /* Keep looking, need to reduce start by the + * length of this buffer. + */ + start -= len; + } + } + + if (found) { + vector[idx] = uv->v; + vector[idx].vio_type = VIO_DATA; + + if (start > 0) { + /* The start position wasn't at the start of + * a buffer, adjust the vio_head of this buffer + * and len and then zero out start for + * future buffers. + */ + len -= start; + vector[idx].vio_head += start; + start = 0; + } + + vector[idx].vio_length = len; + + if (datalen < vector[idx].vio_length) { + /* This is the last buffer, and we're not using + * all of it, adjust vio_length and vio_tail. + */ + vector[idx].vio_length = datalen; + vector[idx].vio_tail = vector[idx].vio_head + + datalen; + datalen = 0; + break; + } else if (datalen == vector[idx].vio_length) { + /* We have reached the end. */ + datalen = 0; + break; + } + + datalen -= vector[idx].vio_length; + + idx++; + } + } + + if (datalen != 0) { + /* There wasn't enough data... */ + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s start %lu remain %lu", + __func__, (unsigned long) start, + (unsigned long) datalen); + return false; + } + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s %s start %lu remain %lu idx %d", + __func__, found ? "found" : "not found", + (unsigned long) start, (unsigned long) datalen, idx); + + return found; +} + +static struct xdr_ioq_uv * +xdr_ioq_use_or_allocate(struct xdr_ioq *xioq, xdr_vio *v, struct xdr_ioq_uv *uv) +{ + struct poolq_entry *have = &uv->uvq, *have2; + struct xdr_ioq_uv *uv2; + + /* We have a header or tailer, let's see if it fits in this buffer, + * otherwise allocate and insert a new buffer. + */ + uint32_t htlen = v->vio_length; + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s Examining xdr_ioq_uv %p (base %p head %p tail %p wrap %p) size %lu length %lu has %lu looking for %lu", + __func__, uv, uv->v.vio_base, uv->v.vio_head, + uv->v.vio_tail, uv->v.vio_wrap, + (unsigned long) ioquv_size(uv), (unsigned long) ioquv_length(uv), + (unsigned long) ioquv_more(uv), htlen); + + if (ioquv_more(uv) >= htlen) { + /* The HEADER or TRAILER will fit */ + v->vio_base = uv->v.vio_base; + v->vio_head = uv->v.vio_tail; + v->vio_tail = uv->v.vio_tail + htlen; + v->vio_wrap = uv->v.vio_wrap; + + /* Fixup tail of this buffer */ + uv->v.vio_tail = v->vio_tail; + } else { + /* We have to allocate and insert a new buffer */ + if (xioq->ioq_uv.uvq_fetch) { + /** @todo: does this actually work? */ + /* more of the same kind */ + have2 = + xioq->ioq_uv.uvq_fetch( + xioq, uv->u.uio_p1, + "next buffer", 1, + IOQ_FLAG_NONE); + + /* poolq_entry is the top element of xdr_ioq_uv + */ + uv2 = IOQ_(have2); + assert((void *)uv2 == (void *)have2); + } else { + uv2 = xdr_ioq_uv_create(xioq->ioq_uv.min_bsize, + UIO_FLAG_FREE); + have2 = &uv2->uvq; + (xioq->ioq_uv.uvqh.qcount)++; + TAILQ_INSERT_AFTER(&xioq->ioq_uv.uvqh.qh, + have, have2, q); + + /* Advance to new buffer */ + uv = uv2; + have = have2; + } + + /* Now set up for the header in the new buffer */ + v->vio_base = uv->v.vio_base; + v->vio_head = uv->v.vio_head; + v->vio_tail = uv->v.vio_head + htlen; + v->vio_wrap = uv->v.vio_wrap; + + /* Fixup tail of this buffer */ + uv->v.vio_tail = v->vio_tail; + } + + if (v->vio_type == VIO_TRAILER_LEN) { + /* Now that we have buffer space for the trailer len, we can + * peek ahead to the next buffer and get it's length and fill + * the length into the buffer. Note that this buffer is not + * part of the gss_iov. + */ + *((uint32_t *) (v[0].vio_head)) = + (uint32_t) htonl(v[1].vio_length); + } + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s Produced xdr_ioq_uv %p (base %p head %p tail %p wrap %p) size %lu length %lu", + __func__, uv, uv->v.vio_base, uv->v.vio_head, + uv->v.vio_tail, uv->v.vio_wrap, + (unsigned long) ioquv_size(uv), + (unsigned long) ioquv_length(uv)); + + return uv; +} + +static bool +xdr_ioq_allochdrs(XDR *xdrs, u_int start, xdr_vio *vector, int iov_count) +{ + bool found = false; + struct xdr_ioq_uv *uv; + int idx = 0; + struct xdr_ioq *xioq = XIOQ(xdrs); + struct poolq_entry *have; + u_int totlen = start; + + /* update the most recent data length, just in case */ + xdr_tail_update(xdrs); + + TAILQ_FOREACH(have, &(XIOQ(xdrs)->ioq_uv.uvqh.qh), q) { + u_int len; + + uv = IOQ_(have); + len = ioquv_length(uv); + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s Examining xdr_ioq_uv %p (base %p head %p tail %p wrap %p) - %s start %lu len %lu", + __func__, uv, uv->v.vio_base, uv->v.vio_head, + uv->v.vio_tail, uv->v.vio_wrap, + found ? "found" : "not found", + (unsigned long) start, (unsigned long) len); + + if (start < len) { + /* start is in this buffer, but not at the start. + * This should be the first data buffer. + */ + found = true; + break; + } + + /* Keep looking, need to reduce start by the length of + * this buffer. + */ + start -= len; + + if (start == 0) { + /* We have found the buffer prior to the one + * that begins at start. + */ + __warnx(TIRPC_DEBUG_FLAG_XDR, + "%s found start after %p", + __func__, uv); + found = true; + break; + } + } + + if (!found) { + /* Failure */ + return false; + } + + /* uv and have are the buffer just before start */ + + if (vector[idx].vio_type == VIO_HEADER) { + if (start != 0) { + /* We are leading with a HEADER, but this buffer has + * data beyond start, so we can't insert the HEADER in + * the right place... + */ + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Oops, trying to insert HEADER in the middle of a buffer"); + return false; + } + + /* We have a header, let's see if it fits in this buffer, + * otherwise allocate and insert a new buffer. + */ + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Calling xdr_ioq_use_or_allocate for idx %d for VIO_HEADER", + idx); + + uv = xdr_ioq_use_or_allocate(xioq, &vector[idx], uv); + + /* Record used space */ + totlen += vector[idx].vio_length; + + /* Advance to next (DATA) buffer */ + idx++; + } + + if (start == 0) { + /* We have the buffer prior to the DATA buffer that should be + * at start, so advance to the next buffer so we will now have + * the first DATA buffer. + */ + uv = IOQ_(TAILQ_NEXT(&uv->uvq, q)); + } + + /* Now idx, uv, and have should be the first DATA buffer */ + while (idx < iov_count && vector[idx].vio_type == VIO_DATA) { + /* Advance to next buffer */ + have = TAILQ_NEXT(have, q); + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Skipping idx %d for VIO_DATA", + idx); + + /* Record used space */ + totlen += vector[idx].vio_length; + + if (have != NULL) { + /* Next buffer exists */ + uv = IOQ_(have); + } /* else leave the last DATA buffer */ + + idx++; + } + + /* Now idx, uv, and have are the last DATA buffer */ + + while (idx < iov_count) { + /* Another TRAILER buffer to manage */ + vio_type vt = vector[idx].vio_type; + + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Calling xdr_ioq_use_or_allocate for idx %d for %s", + idx, + vt == VIO_HEADER ? "VIO_HEADER" + : vt == VIO_DATA ? "VIO_DATA" + : vt == VIO_TRAILER_LEN ? "VIO_TRAILER_LEN" + : vt == VIO_TRAILER ? "VIO_TRAILER" + : "UNKNOWN"); + + if (vt != VIO_TRAILER && vt != VIO_TRAILER_LEN) { + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Oops, buffer other than a trailer found after all data"); + return false; + } + + if (vt == VIO_TRAILER_LEN && + ((idx + 1) == iov_count || + vector[idx + 1].vio_type != VIO_TRAILER)) { + __warnx(TIRPC_DEBUG_FLAG_XDR, + "Oops, VIO_TRAILER_LEN not followed by VIO_TRAILER"); + return false; + } + + uv = xdr_ioq_use_or_allocate(xioq, &vector[idx], uv); + + /* Record used space */ + totlen += vector[idx].vio_length; + + /* Next vector buffer */ + idx++; + } + + /* Update position to end of the last buffer */ + XDR_SETPOS(xdrs, totlen); + + return true; } const struct xdr_ops xdr_ioq_ops = { @@ -775,5 +1265,9 @@ const struct xdr_ops xdr_ioq_ops = { xdr_ioq_destroy_internal, xdr_ioq_control, xdr_ioq_getbufs, - xdr_ioq_putbufs + xdr_ioq_putbufs, + xdr_ioq_newbuf, /* x_newbuf */ + xdr_ioq_iovcount, /* x_iovcount */ + xdr_ioq_fillbufs, /* x_fillbufs */ + xdr_ioq_allochdrs, /* x_allochdrs */ }; diff --git a/src/xdr_mem.c b/src/xdr_mem.c index 71ec90ad50..318193ba24 100644 --- a/src/xdr_mem.c +++ b/src/xdr_mem.c @@ -57,6 +57,7 @@ typedef bool (*dummyfunc3)(XDR *, int, void *); typedef bool (*dummy_getbufs)(XDR *, xdr_uio *, u_int); typedef bool (*dummy_putbufs)(XDR *, xdr_uio *, u_int); +typedef bool (*dummy_newbuf)(struct rpc_xdr *); static const struct xdr_ops xdrmem_ops_aligned; @@ -174,6 +175,79 @@ xdrmem_noop(void) return (false); } +static int +xdrmem_iovcount(XDR *xdrs, u_int start, u_int datalen) +{ + if ((xdrs->x_v.vio_head + start + datalen) > xdrs->x_v.vio_tail) { + /* start and datalen reference outside the size of the data + * in the buffer. + */ + return -1; + } + + return 1; +} + +static bool +xdrmem_fillbufs(XDR *xdrs, u_int start, xdr_vio *vector, u_int datalen) +{ + if ((xdrs->x_v.vio_head + start + datalen) > xdrs->x_v.vio_tail) { + /* start and datalen reference outside the size of the data + * in the buffer. + */ + return false; + } + + vector[0] = xdrs->x_v; + vector[0].vio_type = VIO_DATA; + vector[0].vio_length = vector[0].vio_tail - vector[0].vio_head; + return true; +} + +static bool +xdrmem_allochdrs(XDR *xdrs, u_int start, xdr_vio *vector, int iov_count) +{ + int i; + bool found_data = false; + uint8_t *current = xdrs->x_data; + + for (i = 0; i < iov_count; i++) { + /* If we have found_data and we find another VIO_DATA oops... + * If we have not found_data and we find a non-VIO_DATA oops... + * This simplifies to a single test... + */ + if (found_data == (vector[i].vio_type != VIO_DATA)) { + /* We are being called with a vector we can't support. + * Fixup xdrs and leave. + */ + xdrs->x_data = current; + return false; + } + + if (vector[i].vio_type != VIO_DATA) { + /* Append a reserved buffer for this */ + uint8_t *future = xdrs->x_data + vector[i].vio_length; + + if (future > xdrs->x_v.vio_wrap) { + /* Not enough space, fixup xdrs and leave */ + xdrs->x_data = current; + return false; + } + vector[i].vio_base = xdrs->x_v.vio_base; + vector[i].vio_head = xdrs->x_data; + vector[i].vio_tail = future; + vector[i].vio_wrap = xdrs->x_v.vio_wrap; + xdrs->x_data = future; + } else { + found_data = true; + } + } + + /* update the most recent data length */ + xdr_tail_update(xdrs); + return true; +} + static const struct xdr_ops xdrmem_ops_aligned = { xdrmem_getunit, xdrmem_putunit, @@ -185,4 +259,8 @@ static const struct xdr_ops xdrmem_ops_aligned = { (dummyfunc3) xdrmem_noop, /* x_control */ (dummy_getbufs) xdrmem_noop, /* x_getbufs */ (dummy_putbufs) xdrmem_noop, /* x_putbufs */ + (dummy_newbuf) xdrmem_noop, /* x_newbuf */ + xdrmem_iovcount, /* x_iovcount */ + xdrmem_fillbufs, /* x_fillbufs */ + xdrmem_allochdrs, /* x_allochdrs */ }; diff --git a/src/xdr_rdma.c b/src/xdr_rdma.c index 0e1394f888..e7f4dfe549 100644 --- a/src/xdr_rdma.c +++ b/src/xdr_rdma.c @@ -338,7 +338,7 @@ xdr_rdma_wrap_callback(struct rpc_rdma_cbc *cbc, RDMAXPRT *xprt) { XDR *xdrs = cbc->holdq.xdrs; - return (int)__svc_params->request_cb(&xprt->sm_dr.xprt, xdrs); + return (int)svc_request(&xprt->sm_dr.xprt, xdrs); } /***********************************/ diff --git a/tests/rpcping.c b/tests/rpcping.c index 61974e4532..782bd4da26 100644 --- a/tests/rpcping.c +++ b/tests/rpcping.c @@ -194,26 +194,23 @@ worker(void *arg) return NULL; } -static enum xprt_stat -decode_request(SVCXPRT *xprt, XDR *xdrs) +static struct svc_req * +alloc_request(SVCXPRT *xprt, XDR *xdrs) { struct svc_req *req = calloc(1, sizeof(*req)); - enum xprt_stat stat; SVC_REF(xprt, SVC_REF_FLAG_NONE); req->rq_xprt = xprt; req->rq_xdrs = xdrs; req->rq_refcnt = 1; - stat = SVC_DECODE(req); - - if (req->rq_auth) - SVCAUTH_RELEASE(req); + return req; +} - XDR_DESTROY(req->rq_xdrs); - SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE); +static void +free_request(struct svc_req *req, enum xprt_stat stat) +{ free(req); - return stat; } static void usage() @@ -315,7 +312,8 @@ int main(int argc, char *argv[]) } memset(&svc_params, 0, sizeof(svc_params)); - svc_params.request_cb = decode_request; + svc_params.alloc_cb = alloc_request; + svc_params.free_cb = free_request; svc_params.flags = SVC_INIT_EPOLL | SVC_INIT_NOREG_XPRTS; svc_params.max_events = 512; svc_params.ioq_thrd_max = nworkers; diff --git a/version-h.in.cmake b/version-h.in.cmake index e7a859e5fb..7c8c75c7de 100644 --- a/version-h.in.cmake +++ b/version-h.in.cmake @@ -5,9 +5,9 @@ #define NTIRPC_VERSION_MAJOR @NTIRPC_MAJOR_VERSION@ #define NTIRPC_VERSION_MINOR @NTIRPC_MINOR_VERSION@ -#define NTIRPC_PATCH_LEVEL @NTIRPC_PATCH_LEVEL@ +#define NTIRPC_VERSION_EXTRA @NTIRPC_VERSION_EXTRA@ -#define NTIRPC_VERSION "@NTIRPC_MAJOR_VERSION@.@NTIRPC_MINOR_VERSION@.@NTIRPC_PATCH_LEVEL@" +#define NTIRPC_VERSION "@NTIRPC_VERSION@" #define NTIRPC_VERSION_COMMENT "@VERSION_COMMENT@" #define _GIT_HEAD_COMMIT "@_GIT_HEAD_COMMIT@" #define _GIT_DESCRIBE "@_GIT_DESCRIBE@"