Cdiff svc_rdma.c
*** /webrev/webrev/usr/src/uts/common/rpc/svc_rdma.c-   Mon Aug 14 13:12:12 2006
--- svc_rdma.c  Thu Aug 10 14:22:04 2006

*** 29,38 **** --- 29,51 ---- * Portions of this source code were derived from Berkeley * 4.3 BSD under license from the Regents of the University of * California. */ + /* Copyright (c) 2006, The Ohio State University. All rights reserved. + * + * Portions of this source code is developed by the team members of + * The Ohio State University's Network-Based Computing Laboratory (NBCL), + * headed by Professor Dhabaleswar K. (DK) Panda. + * + * Acknowledgements to contributions from developors: + * Ranjit Noronha: noronha@cse.ohio-state.edu + * Lei Chai : chail@cse.ohio-state.edu + * Weikuan Yu : yuw@cse.ohio-state.edu + * + */ + #pragma ident "@(#)svc_rdma.c 1.8 05/06/10 SMI" /* * Server side of RPC over RDMA in the kernel. */
*** 64,73 **** --- 77,96 ---- #include <inet/common.h> #include <inet/ip.h> #include <inet/ip6.h> + #include <nfs/nfs.h> + + #define SVC_RDMA_SUCCESS 0 + #define SVC_RDMA_FAIL -1 + + #define SVC_CREDIT_FACTOR (0.5) + + uint32_t rdma_bufs_granted = RDMA_BUFS_GRANT; + extern xdrproc_t x_READ3res; + /* * RDMA transport specific data associated with SVCMASTERXPRT */ struct rdma_data { SVCMASTERXPRT *rd_xprt; /* back ptr to SVCMASTERXPRT */
*** 79,88 **** --- 102,113 ---- * Plugin connection specific data stashed away in clone SVCXPRT */ struct clone_rdma_data { CONN *conn; /* RDMA connection */ rdma_buf_t rpcbuf; /* RPC req/resp buffer */ + struct clist *reply_cl; /* reply chunk buffer info */ + struct clist *wlist; /* write list clist */ }; #ifdef DEBUG int rdma_svc_debug = 0; #endif
*** 105,114 **** --- 130,157 ---- static void svc_rdma_kfreeres(SVCXPRT *); static void svc_rdma_kclone_destroy(SVCXPRT *); static void svc_rdma_kstart(SVCMASTERXPRT *); void svc_rdma_kstop(SVCMASTERXPRT *); + static int svc_process_wlist(struct clone_rdma_data *, xdrproc_t, + caddr_t, int *, unsigned int *); + + static int svc_process_long_reply(SVCXPRT *, CONN *, xdrproc_t, + caddr_t, caddr_t vd, XDR **, + struct rpc_msg *, bool_t, int *, + int *, int *, unsigned int *); + + static int svc_compose_rpcmsg(SVCXPRT *, CONN *, xdrproc_t, caddr_t, + rdma_buf_t *, XDR **, struct rpc_msg *, + bool_t, int *, unsigned int *); + #ifdef DYNAMIC_CREDIT_CONTROL + static void svc_consume_credit(CONN *); + static void svc_compute_credit(CONN *, uint32_t, int, int, int *); + static void svc_update_credit(CONN * , int); + static void svc_grant_credit(CONN *, uint32_t *); + #endif + /* * Server transport operations vector. */ struct svc_ops rdma_svc_ops = { svc_rdma_krecv, /* Get requests */
*** 119,129 **** svc_rdma_kdup, /* Check entry in dup req cache */ svc_rdma_kdupdone, /* Mark entry in dup req cache as done */ svc_rdma_kgetres, /* Get pointer to response buffer */ svc_rdma_kfreeres, /* Destroy pre-serialized response header */ svc_rdma_kclone_destroy, /* Destroy a clone xprt */ ! svc_rdma_kstart /* Tell `ready-to-receive' to rpcmod */ }; /* * Server statistics * NOTE: This structure type is duplicated in the NFS fast path. --- 162,173 ---- svc_rdma_kdup, /* Check entry in dup req cache */ svc_rdma_kdupdone, /* Mark entry in dup req cache as done */ svc_rdma_kgetres, /* Get pointer to response buffer */ svc_rdma_kfreeres, /* Destroy pre-serialized response header */ svc_rdma_kclone_destroy, /* Destroy a clone xprt */ ! svc_rdma_kstart, /* Tell `ready-to-receive' to rpcmod */ ! rdma_get_wchunk_seg }; /* * Server statistics * NOTE: This structure type is duplicated in the NFS fast path.
*** 170,179 **** --- 214,227 ---- struct rdma_data *rd; rdma_registry_t *rmod; rdma_xprt_record_t *xprt_rec; queue_t *q; + mutex_enter(&rdma_modload_lock); + error = rdma_modload(); + mutex_exit(&rdma_modload_lock); + /* * modload the RDMA plugins is not already done. */ if (!rdma_modloaded) { mutex_enter(&rdma_modload_lock);
*** 363,957 **** static bool_t svc_rdma_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) { XDR *xdrs; - rdma_stat status; - struct recv_data *rdp = (struct recv_data *)mp->b_rptr; CONN *conn; struct clone_rdma_data *vd; ! struct clist *cl; ! uint_t vers, op, pos; ! uint32_t xid; vd = (struct clone_rdma_data *)clone_xprt->xp_p2buf; RSSTAT_INCR(rscalls); conn = rdp->conn; ! /* ! * Post a receive descriptor on this ! * endpoint to ensure all packets are received. ! */ status = rdma_svc_postrecv(conn); if (status != RDMA_SUCCESS) { cmn_err(CE_NOTE, "svc_rdma_krecv: rdma_svc_postrecv failed %d", status); } - if (rdp->status != 0) { - RDMA_BUF_FREE(conn, &rdp->rpcmsg); - RDMA_REL_CONN(conn); - RSSTAT_INCR(rsbadcalls); - freeb(mp); - return (FALSE); - } - - /* - * Decode rpc message - */ xdrs = &clone_xprt->xp_xdrin; xdrmem_create(xdrs, rdp->rpcmsg.addr, rdp->rpcmsg.len, XDR_DECODE); - - /* - * Get the XID - */ - /* - * Treat xid as opaque (xid is the first entity - * in the rpc rdma message). - */ xid = *(uint32_t *)rdp->rpcmsg.addr; - /* Skip xid and set the xdr position accordingly. */ XDR_SETPOS(xdrs, sizeof (uint32_t)); if (! xdr_u_int(xdrs, &vers) || ! xdr_u_int(xdrs, &op)) { cmn_err(CE_WARN, "svc_rdma_krecv: xdr_u_int failed"); ! XDR_DESTROY(xdrs); ! RDMA_BUF_FREE(conn, &rdp->rpcmsg); ! RDMA_REL_CONN(conn); ! freeb(mp); ! RSSTAT_INCR(rsbadcalls); ! return (FALSE); } - if (op == RDMA_DONE) { - /* - * Should not get RDMA_DONE - */ - freeb(mp); - XDR_DESTROY(xdrs); - RDMA_BUF_FREE(conn, &rdp->rpcmsg); - RDMA_REL_CONN(conn); - RSSTAT_INCR(rsbadcalls); - return (FALSE); /* no response */ - } ! #ifdef DEBUG ! if (rdma_svc_debug) ! printf("svc_rdma_krecv: recv'd call xid %u\n", xid); #endif ! /* ! * Now decode the chunk list ! */ ! cl = NULL; if (! xdr_do_clist(xdrs, &cl)) { cmn_err(CE_WARN, "svc_rdma_krecv: xdr_do_clist failed"); } /* * A chunk at 0 offset indicates that the RPC call message * is in a chunk. Get the RPC call message chunk. */ if (cl != NULL && op == RDMA_NOMSG) { - struct clist *cllong; /* Long RPC chunk */ /* Remove RPC call message chunk from chunklist */ cllong = cl; cl = cl->c_next; cllong->c_next = NULL; /* Allocate and register memory for the RPC call msg chunk */ cllong->c_daddr = (uint64)(uintptr_t) kmem_alloc(cllong->c_len, KM_SLEEP); if (cllong->c_daddr == NULL) { ! cmn_err(CE_WARN, ! "svc_rdma_krecv: no memory for rpc call"); ! XDR_DESTROY(xdrs); ! RDMA_BUF_FREE(conn, &rdp->rpcmsg); ! RDMA_REL_CONN(conn); ! freeb(mp); ! RSSTAT_INCR(rsbadcalls); ! clist_free(cl); clist_free(cllong); ! return (FALSE); } status = clist_register(conn, cllong, 0); if (status) { ! cmn_err(CE_WARN, ! "svc_rdma_krecv: clist_register failed"); kmem_free((void *)(uintptr_t)cllong->c_daddr, cllong->c_len); ! XDR_DESTROY(xdrs); ! RDMA_BUF_FREE(conn, &rdp->rpcmsg); ! RDMA_REL_CONN(conn); ! freeb(mp); ! RSSTAT_INCR(rsbadcalls); ! clist_free(cl); clist_free(cllong); ! return (FALSE); } /* * Now read the RPC call message in */ status = RDMA_READ(conn, cllong, WAIT); if (status) { ! cmn_err(CE_WARN, ! "svc_rdma_krecv: rdma_read failed %d", status); (void) clist_deregister(conn, cllong, 0); kmem_free((void *)(uintptr_t)cllong->c_daddr, cllong->c_len); ! XDR_DESTROY(xdrs); ! RDMA_BUF_FREE(conn, &rdp->rpcmsg); ! RDMA_REL_CONN(conn); ! freeb(mp); ! RSSTAT_INCR(rsbadcalls); ! clist_free(cl); clist_free(cllong); ! return (FALSE); } - /* - * Sync memory for CPU after DMA - */ - status = clist_syncmem(conn, cllong, 0); ! /* ! * Deregister the chunk ! */ (void) clist_deregister(conn, cllong, 0); - /* - * Setup the XDR for the RPC call message - */ xdrrdma_create(xdrs, (caddr_t)(uintptr_t)cllong->c_daddr, cllong->c_len, 0, cl, XDR_DECODE, conn); vd->rpcbuf.type = CHUNK_BUFFER; vd->rpcbuf.addr = (caddr_t)(uintptr_t)cllong->c_daddr; vd->rpcbuf.len = cllong->c_len; vd->rpcbuf.handle.mrc_rmr = 0; ! ! /* ! * Free the chunk element with the Long RPC details and ! * the message received. ! */ clist_free(cllong); RDMA_BUF_FREE(conn, &rdp->rpcmsg); } else { pos = XDR_GETPOS(xdrs); - - /* - * Now the RPC call message header - */ xdrrdma_create(xdrs, rdp->rpcmsg.addr + pos, rdp->rpcmsg.len - pos, 0, cl, XDR_DECODE, conn); vd->rpcbuf = rdp->rpcmsg; } if (! xdr_callmsg(xdrs, msg)) { cmn_err(CE_WARN, "svc_rdma_krecv: xdr_callmsg failed"); - if (cl != NULL) - clist_free(cl); - XDR_DESTROY(xdrs); - rdma_buf_free(conn, &vd->rpcbuf); - RDMA_REL_CONN(conn); - freeb(mp); RSSTAT_INCR(rsxdrcall); ! RSSTAT_INCR(rsbadcalls); ! return (FALSE); } /* * Point the remote transport address in the service_transport * handle at the address in the request. */ clone_xprt->xp_rtaddr.buf = conn->c_raddr.buf; clone_xprt->xp_rtaddr.len = conn->c_raddr.len; clone_xprt->xp_rtaddr.maxlen = conn->c_raddr.len; - - #ifdef DEBUG - if (rdma_svc_debug) { - struct sockaddr_in *sin4; - char print_addr[INET_ADDRSTRLEN]; - - sin4 = (struct sockaddr_in *)clone_xprt->xp_rtaddr.buf; - bzero(print_addr, INET_ADDRSTRLEN); - (void) inet_ntop(AF_INET, - &sin4->sin_addr, print_addr, INET_ADDRSTRLEN); - cmn_err(CE_NOTE, - "svc_rdma_krecv: remote clnt_addr: %s", print_addr); - } - #endif - clone_xprt->xp_xid = xid; vd->conn = conn; freeb(mp); return (TRUE); } ! /* ! * Send rpc reply. ! */ ! static bool_t ! svc_rdma_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg) { ! struct clone_rdma_data *vd; ! XDR *xdrs = &(clone_xprt->xp_xdrout), rxdrs; ! int retval = FALSE; ! xdrproc_t xdr_results; ! caddr_t xdr_location; ! bool_t has_args, reg = FALSE; ! uint_t len, op; ! uint_t vers; ! struct clist *cl = NULL, *cle = NULL; ! struct clist *sendlist = NULL; ! int status; ! int msglen; ! rdma_buf_t clmsg, longreply, rpcreply; ! vd = (struct clone_rdma_data *)clone_xprt->xp_p2buf; ! /* ! * If there is a result procedure specified in the reply message, ! * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. ! * We need to make sure it won't be processed twice, so we null ! * it for xdr_replymsg here. ! */ ! has_args = FALSE; ! if (msg->rm_reply.rp_stat == MSG_ACCEPTED && ! msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { ! if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { ! has_args = TRUE; ! xdr_location = msg->acpted_rply.ar_results.where; ! msg->acpted_rply.ar_results.proc = xdr_void; ! msg->acpted_rply.ar_results.where = NULL; } - } ! /* ! * Get the size of the rpc reply message. Need this ! * to determine if the rpc reply message will fit in ! * the pre-allocated RDMA buffers. If the rpc reply ! * message length is greater that the pre-allocated ! * buffers then, a one time use buffer is allocated ! * and registered for this rpc reply. ! */ ! msglen = xdr_sizeof(xdr_replymsg, msg); ! if (has_args && msg->rm_reply.rp_acpt.ar_verf.oa_flavor != RPCSEC_GSS) { ! msglen += xdrrdma_sizeof(xdr_results, xdr_location, ! rdma_minchunk); ! if (msglen > RPC_MSG_SZ) { /* ! * Allocate chunk buffer for rpc reply */ ! rpcreply.type = CHUNK_BUFFER; ! rpcreply.addr = kmem_zalloc(msglen, KM_SLEEP); ! cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); ! cle->c_xdroff = 0; ! cle->c_len = rpcreply.len = msglen; ! cle->c_saddr = (uint64)(uintptr_t)rpcreply.addr; ! cle->c_next = NULL; ! xdrrdma_create(xdrs, rpcreply.addr, msglen, ! rdma_minchunk, cle, XDR_ENCODE, NULL); ! op = RDMA_NOMSG; } else { ! /* ! * Get a pre-allocated buffer for rpc reply ! */ ! rpcreply.type = SEND_BUFFER; ! if (RDMA_BUF_ALLOC(vd->conn, &rpcreply)) { ! cmn_err(CE_WARN, ! "svc_rdma_ksend: no free buffers!"); ! return (retval); } ! xdrrdma_create(xdrs, rpcreply.addr, rpcreply.len, ! rdma_minchunk, NULL, XDR_ENCODE, NULL); ! op = RDMA_MSG; } /* ! * Initialize the XDR encode stream. */ ! msg->rm_xid = clone_xprt->xp_xid; ! if (!(xdr_replymsg(xdrs, msg) && ! (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, ! xdr_results, xdr_location)))) { ! rdma_buf_free(vd->conn, &rpcreply); ! if (cle) ! clist_free(cle); ! cmn_err(CE_WARN, ! "svc_rdma_ksend: xdr_replymsg/SVCAUTH_WRAP " "failed"); ! goto out; } ! len = XDR_GETPOS(xdrs); } - if (has_args && msg->rm_reply.rp_acpt.ar_verf.oa_flavor == RPCSEC_GSS) { /* ! * For RPCSEC_GSS since we cannot accurately presize the ! * buffer required for encoding, we assume that its going ! * to be a Long RPC to start with. We also create the ! * the XDR stream with min_chunk set to 0 which instructs ! * the XDR layer to not chunk the incoming byte stream. */ - msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth); - msglen += xdr_sizeof(xdr_results, xdr_location); /* ! * Long RPC. Allocate one time use custom buffer. */ ! longreply.type = CHUNK_BUFFER; ! longreply.addr = kmem_zalloc(msglen, KM_SLEEP); ! cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); ! cle->c_xdroff = 0; ! cle->c_len = longreply.len = msglen; ! cle->c_saddr = (uint64)(uintptr_t)longreply.addr; ! cle->c_next = NULL; ! xdrrdma_create(xdrs, longreply.addr, msglen, 0, cle, ! XDR_ENCODE, NULL); ! op = RDMA_NOMSG; /* ! * Initialize the XDR encode stream. */ msg->rm_xid = clone_xprt->xp_xid; ! if (!(xdr_replymsg(xdrs, msg) && ! (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs, xdr_results, xdr_location)))) { ! if (longreply.addr != xdrs->x_base) { ! longreply.addr = xdrs->x_base; ! longreply.len = xdr_getbufsize(xdrs); } ! rdma_buf_free(vd->conn, &longreply); ! if (cle) ! clist_free(cle); ! cmn_err(CE_WARN, ! "svc_rdma_ksend: xdr_replymsg/SVCAUTH_WRAP " ! "failed"); ! goto out; } /* ! * If we had to allocate a new buffer while encoding ! * then update the addr and len. */ ! if (longreply.addr != xdrs->x_base) { ! longreply.addr = xdrs->x_base; ! longreply.len = xdr_getbufsize(xdrs); } ! len = XDR_GETPOS(xdrs); /* - * If it so happens that the encoded message is after all - * not long enough to be a Long RPC then allocate a - * SEND_BUFFER and copy the encoded message into it. - */ - if (len > RPC_MSG_SZ) { - rpcreply.type = CHUNK_BUFFER; - rpcreply.addr = longreply.addr; - rpcreply.len = longreply.len; - } else { - clist_free(cle); - XDR_DESTROY(xdrs); - /* * Get a pre-allocated buffer for rpc reply */ ! rpcreply.type = SEND_BUFFER; ! if (RDMA_BUF_ALLOC(vd->conn, &rpcreply)) { ! cmn_err(CE_WARN, ! "svc_rdma_ksend: no free buffers!"); ! rdma_buf_free(vd->conn, &longreply); ! return (retval); } ! bcopy(longreply.addr, rpcreply.addr, len); ! xdrrdma_create(xdrs, rpcreply.addr, len, 0, NULL, XDR_ENCODE, NULL); - rdma_buf_free(vd->conn, &longreply); - op = RDMA_MSG; } } ! if (has_args == FALSE) { ! if (msglen > RPC_MSG_SZ) { ! /* ! * Allocate chunk buffer for rpc reply */ ! rpcreply.type = CHUNK_BUFFER; ! rpcreply.addr = kmem_zalloc(msglen, KM_SLEEP); ! cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); ! cle->c_xdroff = 0; ! cle->c_len = rpcreply.len = msglen; ! cle->c_saddr = (uint64)(uintptr_t)rpcreply.addr; ! cle->c_next = NULL; ! xdrrdma_create(xdrs, rpcreply.addr, msglen, ! rdma_minchunk, cle, XDR_ENCODE, NULL); ! op = RDMA_NOMSG; ! } else { /* ! * Get a pre-allocated buffer for rpc reply */ ! rpcreply.type = SEND_BUFFER; ! if (RDMA_BUF_ALLOC(vd->conn, &rpcreply)) { ! cmn_err(CE_WARN, ! "svc_rdma_ksend: no free buffers!"); ! return (retval); } - xdrrdma_create(xdrs, rpcreply.addr, rpcreply.len, - rdma_minchunk, NULL, XDR_ENCODE, NULL); - op = RDMA_MSG; } /* ! * Initialize the XDR encode stream. */ ! msg->rm_xid = clone_xprt->xp_xid; ! ! if (!xdr_replymsg(xdrs, msg)) { ! rdma_buf_free(vd->conn, &rpcreply); ! if (cle) ! clist_free(cle); ! cmn_err(CE_WARN, ! "svc_rdma_ksend: xdr_replymsg/SVCAUTH_WRAP " ! "failed"); goto out; } - len = XDR_GETPOS(xdrs); } /* ! * Get clist and a buffer for sending it across */ ! cl = xdrrdma_clist(xdrs); ! clmsg.type = SEND_BUFFER; ! if (RDMA_BUF_ALLOC(vd->conn, &clmsg)) { ! rdma_buf_free(vd->conn, &rpcreply); ! cmn_err(CE_WARN, "svc_rdma_ksend: no free buffers!!"); goto out; } ! /* ! * Now register the chunks in the list ! */ if (cl != NULL) { ! status = clist_register(vd->conn, cl, 1); ! if (status != RDMA_SUCCESS) { ! rdma_buf_free(vd->conn, &clmsg); ! cmn_err(CE_WARN, ! "svc_rdma_ksend: clist register failed"); goto out; } - reg = TRUE; - } ! /* ! * XDR the XID, vers, and op ! */ ! /* ! * Treat xid as opaque (xid is the first entity ! * in the rpc rdma message). ! */ vers = RPCRDMA_VERS; xdrs = &rxdrs; xdrmem_create(xdrs, clmsg.addr, clmsg.len, XDR_ENCODE); ! (*(uint32_t *)clmsg.addr) = msg->rm_xid; /* Skip xid and set the xdr position accordingly. */ ! XDR_SETPOS(xdrs, sizeof (uint32_t)); ! if (! xdr_u_int(xdrs, &vers) || ! ! xdr_u_int(xdrs, &op)) { ! rdma_buf_free(vd->conn, &rpcreply); ! rdma_buf_free(vd->conn, &clmsg); cmn_err(CE_WARN, "svc_rdma_ksend: xdr_u_int failed"); goto out; } /* ! * Now XDR the chunk list */ (void) xdr_do_clist(xdrs, &cl); - clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle, clmsg.addr, - NULL, NULL); - - if (op == RDMA_MSG) { - clist_add(&sendlist, 0, len, &rpcreply.handle, rpcreply.addr, - NULL, NULL); - } else { - cl->c_len = len; - RSSTAT_INCR(rslongrpcs); - } - /* ! * Send the reply message to the client */ ! if (cl != NULL) { ! status = clist_syncmem(vd->conn, cl, 1); ! if (status != RDMA_SUCCESS) { ! rdma_buf_free(vd->conn, &rpcreply); ! rdma_buf_free(vd->conn, &clmsg); goto out; } - #ifdef DEBUG - if (rdma_svc_debug) - printf("svc_rdma_ksend: chunk response len %d xid %u\n", - cl->c_len, msg->rm_xid); - #endif - /* - * Post a receive buffer because we expect a RDMA_DONE - * message. - */ - status = rdma_svc_postrecv(vd->conn); /* ! * Send the RPC reply message and wait for RDMA_DONE */ ! status = RDMA_SEND_RESP(vd->conn, sendlist, msg->rm_xid); ! if (status != RDMA_SUCCESS) { ! #ifdef DEBUG ! if (rdma_svc_debug) ! cmn_err(CE_NOTE, "svc_rdma_ksend: " ! "rdma_send_resp failed %d", status); ! #endif ! goto out; } ! #ifdef DEBUG ! if (rdma_svc_debug) ! printf("svc_rdma_ksend: got RDMA_DONE xid %u\n", msg->rm_xid); #endif - } else { - #ifdef DEBUG - if (rdma_svc_debug) - printf("svc_rdma_ksend: msg response xid %u\n", msg->rm_xid); - #endif - status = RDMA_SEND(vd->conn, sendlist, msg->rm_xid); if (status != RDMA_SUCCESS) { - #ifdef DEBUG - if (rdma_svc_debug) - cmn_err(CE_NOTE, "svc_rdma_ksend: " - "rdma_send failed %d", status); - #endif goto out; } - } retval = TRUE; out: - /* - * Deregister the chunks - */ - if (cl != NULL) { - if (reg) - (void) clist_deregister(vd->conn, cl, 1); - if (op == RDMA_NOMSG) { - /* - * Long RPC reply in chunk. Free it up. - */ - rdma_buf_free(vd->conn, &rpcreply); - } - clist_free(cl); - } /* * Free up sendlist chunks */ if (sendlist != NULL) --- 411,1200 ---- static bool_t svc_rdma_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg) { XDR *xdrs; CONN *conn; + + struct recv_data *rdp = (struct recv_data *)mp->b_rptr; struct clone_rdma_data *vd; ! struct clist *cl = NULL; ! struct clist *wcl = NULL; ! struct clist *repcl = NULL; ! struct clist *cllong = NULL; + rdma_stat status; + rdma_srv_cred_ctrl_t *cc_info; + + uint32_t vers, op, pos, xid; + uint32_t rdma_credit; + uint32_t wcl_total_length = 0; + bool_t wwl= FALSE; + int i, numclnts, availbufs, to_be_posted; + #ifdef SERVER_REG_CACHE + rib_lrc_entry_t *long_reply_buf = NULL; + #endif vd = (struct clone_rdma_data *)clone_xprt->xp_p2buf; RSSTAT_INCR(rscalls); conn = rdp->conn; ! #ifdef DYNAMIC_CREDIT_CONTROL ! RDMA_GET_RESOURCE_INFO(conn, &numclnts, &availbufs); ! svc_consume_credit(conn); ! #else status = rdma_svc_postrecv(conn); if (status != RDMA_SUCCESS) { cmn_err(CE_NOTE, "svc_rdma_krecv: rdma_svc_postrecv failed %d", status); + goto badrpc_call; } + #endif xdrs = &clone_xprt->xp_xdrin; xdrmem_create(xdrs, rdp->rpcmsg.addr, rdp->rpcmsg.len, XDR_DECODE); xid = *(uint32_t *)rdp->rpcmsg.addr; XDR_SETPOS(xdrs, sizeof (uint32_t)); + if (! xdr_u_int(xdrs, &vers) || + ! xdr_u_int(xdrs, &rdma_credit) || ! xdr_u_int(xdrs, &op)) { cmn_err(CE_WARN, "svc_rdma_krecv: xdr_u_int failed"); ! goto xdr_err; } ! #ifdef DYNAMIC_CREDIT_CONTROL ! svc_compute_credit(conn, rdma_credit, numclnts, availbufs, &to_be_posted); ! for(i=0; i<to_be_posted; i++){ ! status = rdma_svc_postrecv(conn); ! if (status != RDMA_SUCCESS) { ! cmn_err(CE_NOTE, ! "svc_rdma_krecv: rdma_svc_postrecv failed %d", status); ! goto badrpc_call; ! } ! } ! svc_update_credit(conn, to_be_posted); #endif ! ! if (rdp->status != 0) { ! cmn_err(CE_NOTE, ! "svc_rdma_krecv: invalid status %d", ! rdp->status); ! goto badrpc_call; ! } ! if (! xdr_do_clist(xdrs, &cl)) { cmn_err(CE_WARN, "svc_rdma_krecv: xdr_do_clist failed"); + goto xdr_err; } + if (!xdr_decode_wlist_new(xdrs, &wcl, &wwl, &wcl_total_length,conn)) { + cmn_err(CE_NOTE, "svc recv: xdr_decode_wlist failed"); + if (cl) + clist_free(cl); + goto xdr_err; + } + vd->wlist = wcl; + + (void) xdr_decode_reply_wchunk(xdrs, &repcl, conn); + vd->reply_cl = repcl; + /* * A chunk at 0 offset indicates that the RPC call message * is in a chunk. Get the RPC call message chunk. */ if (cl != NULL && op == RDMA_NOMSG) { /* Remove RPC call message chunk from chunklist */ cllong = cl; cl = cl->c_next; cllong->c_next = NULL; /* Allocate and register memory for the RPC call msg chunk */ + #ifdef SERVER_REG_CACHE + long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn,cllong->c_len); + cllong->long_reply_buf = (uint64)long_reply_buf; + cllong->c_daddr = (uint64)(uintptr_t) long_reply_buf->lrc_buf; + #else cllong->c_daddr = (uint64)(uintptr_t) kmem_alloc(cllong->c_len, KM_SLEEP); + #endif if (cllong->c_daddr == NULL) { ! cmn_err(CE_WARN, "svc krecv: no memory for rpc call"); clist_free(cllong); ! goto cll_malloc_err; } + status = clist_register(conn, cllong, 0); if (status) { ! cmn_err(CE_WARN, "svc krecv: clist_register failed"); ! #ifdef SERVER_REG_CACHE ! RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)cllong->long_reply_buf); ! #else ! if(cllong->c_len) kmem_free((void *)(uintptr_t)cllong->c_daddr, cllong->c_len); ! #endif ! if(cllong) clist_free(cllong); ! goto cll_malloc_err; } /* * Now read the RPC call message in */ status = RDMA_READ(conn, cllong, WAIT); if (status) { ! cmn_err(CE_WARN, "svc_rdma_krecv: rdma_read failed"); (void) clist_deregister(conn, cllong, 0); + #ifdef SERVER_REG_CACHE + RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)cllong->long_reply_buf); + #else kmem_free((void *)(uintptr_t)cllong->c_daddr, cllong->c_len); ! #endif clist_free(cllong); ! goto cll_malloc_err; } ! status = clist_syncmem(conn, cllong, 0); (void) clist_deregister(conn, cllong, 0); xdrrdma_create(xdrs, (caddr_t)(uintptr_t)cllong->c_daddr, cllong->c_len, 0, cl, XDR_DECODE, conn); + vd->rpcbuf.type = CHUNK_BUFFER; vd->rpcbuf.addr = (caddr_t)(uintptr_t)cllong->c_daddr; vd->rpcbuf.len = cllong->c_len; vd->rpcbuf.handle.mrc_rmr = 0; ! #ifdef SERVER_REG_CACHE ! vd->rpcbuf.long_reply_buf = (rib_lrc_entry_t *)cllong->long_reply_buf; ! #endif clist_free(cllong); RDMA_BUF_FREE(conn, &rdp->rpcmsg); } else { pos = XDR_GETPOS(xdrs); xdrrdma_create(xdrs, rdp->rpcmsg.addr + pos, rdp->rpcmsg.len - pos, 0, cl, XDR_DECODE, conn); vd->rpcbuf = rdp->rpcmsg; } + if (! xdr_callmsg(xdrs, msg)) { cmn_err(CE_WARN, "svc_rdma_krecv: xdr_callmsg failed"); RSSTAT_INCR(rsxdrcall); ! goto callmsg_err; } /* + * wlist sent for something besides NFS3 READ, so ignore it. + * FTDO: this isn't appropriate for READLINK3, but our client + * will never drive writelist for READLINK3, so good enough + * for the demo. + */ + if (vd->wlist != NULL && + (msg->rm_call.cb_rpcvers != RPC_MSG_VERSION || + msg->rm_call.cb_prog != NFS3_PROGRAM || + msg->rm_call.cb_vers != NFS_V3 || + msg->rm_call.cb_proc != NFSPROC3_READ)) { + #ifdef SERVER_REG_CACHE + RDMA_FREE_SERVER_CACHE_BUF(conn,(rib_lrc_entry_t *)wcl->long_reply_buf); + #else + kmem_free((void *)wcl->c_saddr, wcl_total_length); + #endif + clist_free(wcl); + vd->wlist = NULL; + } + + /* * Point the remote transport address in the service_transport * handle at the address in the request. */ clone_xprt->xp_rtaddr.buf = conn->c_raddr.buf; clone_xprt->xp_rtaddr.len = conn->c_raddr.len; clone_xprt->xp_rtaddr.maxlen = conn->c_raddr.len; clone_xprt->xp_xid = xid; vd->conn = conn; + freeb(mp); return (TRUE); + callmsg_err: + rdma_buf_free(conn, &vd->rpcbuf); + cll_malloc_err: + if (cl) + clist_free(cl); + if (wcl != NULL) { + #ifdef SERVER_REG_CACHE + RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)wcl->long_reply_buf); + #else + kmem_free((void *)wcl->c_saddr, wcl_total_length); + #endif + clist_free(wcl); + } + xdr_err: + XDR_DESTROY(xdrs); + badrpc_call: + RDMA_BUF_FREE(conn, &rdp->rpcmsg); + RDMA_REL_CONN(conn); + freeb(mp); + RSSTAT_INCR(rsbadcalls); + return (FALSE); } ! #ifdef DYNAMIC_CREDIT_CONTROL ! static void ! svc_consume_credit(CONN *conn) { ! rdma_srv_cred_ctrl_t *cc_info; ! mutex_enter(&conn->c_lock); ! cc_info = &conn->rdma_conn_cred_ctrl_u.c_srv_cc; ! cc_info->srv_cc_posted--; ! mutex_exit(&conn->c_lock); ! } ! static void ! svc_compute_credit(CONN *conn, uint32_t rdma_credit, int numclnts, ! int availbufs, int *to_be_posted) ! { ! int average, grant; ! rdma_srv_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_srv_cc; ! ! if(numclnts == 0){ ! cmn_err(CE_NOTE, "There is no active client!\n"); ! *to_be_posted = 0; ! return; } ! average = availbufs/numclnts; + mutex_enter(&conn->c_lock); + + if(rdma_credit <= cc_info->srv_cc_posted) + grant = cc_info->srv_cc_posted; + else if(rdma_credit <= average) + grant = rdma_credit; + else + grant = average + (rdma_credit - average) * SVC_CREDIT_FACTOR; + + *to_be_posted = grant - cc_info->srv_cc_posted; + if(*to_be_posted < 0) + *to_be_posted = 0; + if(*to_be_posted > availbufs) + *to_be_posted = availbufs/2; + + mutex_exit(&conn->c_lock); + } + + static void + svc_grant_credit(CONN * conn, uint32_t * rdma_credit) + { + rdma_srv_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_srv_cc; + + mutex_enter(&conn->c_lock); + /* ! * Fill in the granted number of buffers ! * for credit control. ! * ! * XXX Currently ignoring what the client sends. */ ! *rdma_credit = cc_info->srv_cc_buffers_granted; ! mutex_exit(&conn->c_lock); ! } ! ! static void ! svc_update_credit(CONN * conn, int i) ! { ! rdma_srv_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_srv_cc; ! ! mutex_enter(&conn->c_lock); ! cc_info->srv_cc_buffers_granted = cc_info->srv_cc_posted + i; ! cc_info->srv_cc_posted = cc_info->srv_cc_buffers_granted; ! mutex_exit(&conn->c_lock); ! } ! #endif ! ! static int ! svc_process_wlist(struct clone_rdma_data *vd, xdrproc_t xdr_results, ! caddr_t xdr_location, int *num_wsegment, ! unsigned int *templen) ! { ! struct clist *wcl; ! int data_len, avail_len, num, status; ! READ3resok *rok; ! ! rok = &(((READ3res *) xdr_location)->res_u.ok); ! data_len = num = avail_len = 0; ! ! wcl = vd->wlist; ! while (wcl != NULL) { ! if (wcl->c_dmemhandle.mrc_rmr != 0 ! && xdr_results == x_READ3res) { ! ! avail_len += wcl->c_len; ! if (wcl->c_len < rok->count) { ! data_len += wcl->c_len; } else { ! /* Can make the rest chunks all 0-len */ ! data_len += rok->count; ! wcl->c_len = rok->count; } ! rok->count -= wcl->c_len; ! num ++; } + else { + cmn_err(CE_NOTE, + "svc_process_wlist: wlist has an error\n"); + } + wcl = wcl->c_next; + } /* ! * MUST fail if there are still more data */ ! if (rok->count > 0) { ! cmn_err(CE_NOTE, ! "svc_process_wlist: data_len is too short \n"); ! return SVC_RDMA_FAIL; ! } ! wcl = vd->wlist; ! rok->count = data_len; ! rok->wlist_len = data_len; ! rok->wlist = wcl; ! *num_wsegment = num; ! *templen = avail_len; ! ! /* Register, sync and write over the data */ ! if (data_len > 0) { ! status = clist_register(vd->conn, wcl, TRUE); ! if (status != RDMA_SUCCESS) { ! cmn_err(CE_NOTE, ! "svc_process_wlist: clist_register " "failed"); ! return SVC_RDMA_FAIL; } ! ! status = clist_syncmem(vd->conn, wcl, TRUE); ! if (status != RDMA_SUCCESS) { ! cmn_err(CE_NOTE, ! "svc_process_wlist: syncmem failed(%d)", ! status); ! return SVC_RDMA_FAIL; } + status = RDMA_WRITE(vd->conn, wcl, NOWAIT); + if (status != RDMA_SUCCESS) { + cmn_err(CE_NOTE, + "svc_process_wlist: RDMA_WRITE failed(%d)", + status); + return SVC_RDMA_FAIL; + } + } + + return SVC_RDMA_SUCCESS; + } + + static int + svc_process_long_reply(SVCXPRT * clone_xprt, CONN * conn, + xdrproc_t xdr_results, caddr_t xdr_location, + caddr_t vd, XDR ** xdrs, + struct rpc_msg *msg, bool_t has_args, int *msglen, + int *freelen, int *num, unsigned int *len) + { + rdma_buf_t long_rpc = {0}; + int status; + struct clist *ncl = NULL, *wcl = NULL; + char *memp = NULL; + int avail_len = 0; + int count = 0; + int data_len = 0; + *num = 0; + *freelen = 0; /* ! * If the clone_xprt struct has a reply chunk list, ! * then we MUST RDMA_WRITE the reply back to the client, ! * no matter what its size is. This translates to: ! * ! * RDMA_WRITE + RDMA_SEND(op = RDMA_NOMSG) ! * ! * XXX the rdma write code currently ignores kerberos. */ + (*msglen) += xdrrdma_sizeof(xdr_results, xdr_location, rdma_minchunk); + + wcl = (struct clist *)vd; + count = *msglen; + while (wcl != NULL) { + *freelen += wcl->c_len; + if (wcl->c_dmemhandle.mrc_rmr != 0) + { + avail_len += wcl->c_len; + if (wcl->c_len < count) { + data_len += wcl->c_len; + } else { + data_len += count; + wcl->c_len = count; + } + count -= wcl->c_len; + *num += 1; + } + else { + cmn_err(CE_NOTE, + "svc_process_long_reply: wchunk list has an error\n"); + } + wcl = wcl->c_next; + } + /* ! * MUST fail if there are still more data */ ! if (count > 0) { ! cmn_err(CE_NOTE, ! "svc_process_long_reply: data_len is too short \n"); ! return SVC_RDMA_FAIL; ! } /* ! * Setup buffers for long rpc reply */ + + /* + * We specify 0 for the chunk size since we + * don't want a chunk list. + */ + wcl = (struct clist *)vd; + xdrrdma_create(*xdrs, (caddr_t)wcl->c_saddr , *msglen, 0, + wcl, XDR_ENCODE, NULL); + msg->rm_xid = clone_xprt->xp_xid; ! if (!(xdr_replymsg(*xdrs, msg) && ! (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, *xdrs, xdr_results, xdr_location)))) { ! kmem_free((void *)wcl->c_saddr, *freelen); ! cmn_err(CE_WARN, "svc_process_long_reply: " ! "xdr_replymsg/SVCAUTH_WRAP failed " ! "for long reply\n"); ! return SVC_RDMA_FAIL; } ! *len = XDR_GETPOS(*xdrs); ! ! if (clist_register(conn, wcl, TRUE) != RDMA_SUCCESS) { ! #ifdef SERVER_REG_CACHE ! RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)wcl->long_reply_buf); ! #else ! kmem_free((void *)(wcl->c_saddr), *freelen); ! #endif ! cmn_err(CE_NOTE, "svc_process_long_reply: RDMA_WRITE: " ! "clist reg failed"); ! return SVC_RDMA_FAIL; } + status = clist_syncmem(conn, wcl, TRUE); + if (status) { + (void) clist_deregister(conn, wcl, TRUE); + #ifdef SERVER_REG_CACHE + RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)wcl->long_reply_buf); + #else + kmem_free((void *)(wcl->c_saddr), *freelen); + #endif + cmn_err(CE_NOTE, + "svc_process_long_reply: sync mem failed %d", status); + return SVC_RDMA_FAIL; + } + /* ! * Note: we must pass WAIT into the rdma write call to ! * ensure that the call completes before we move on, where ! * part of 'moving on' is deregistering the memory -- and ! * if the memory is deregistered before the write completes ! * we'll have an error. */ ! status = RDMA_WRITE(conn, wcl, NOWAIT); ! if (status != RDMA_SUCCESS) { ! (void) clist_deregister(conn, wcl, TRUE); ! #ifdef SERVER_REG_CACHE ! RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)wcl->long_reply_buf); ! #else ! kmem_free((void *)(wcl->c_saddr), *freelen); ! #endif ! cmn_err(CE_NOTE, ! "svc_process_long_reply: RDMA_WRITE failed %d", ! status); ! return SVC_RDMA_FAIL; } ! return SVC_RDMA_SUCCESS; ! } + static int + svc_compose_rpcmsg(SVCXPRT * clone_xprt, CONN * conn, xdrproc_t xdr_results, + caddr_t xdr_location, rdma_buf_t * rpcreply, XDR ** xdrs, + struct rpc_msg *msg, bool_t has_args, int *msglen, + unsigned int *len) + { + int auth_flavor = msg->rm_reply.rp_acpt.ar_verf.oa_flavor; + + if (has_args && auth_flavor != RPCSEC_GSS) + (*msglen) += xdrrdma_sizeof(xdr_results, xdr_location, + rdma_minchunk); + else if (has_args && auth_flavor == RPCSEC_GSS) { + (*msglen) += + 2 * MAX_AUTH_BYTES + 2 * sizeof(struct opaque_auth); + (*msglen) += xdr_sizeof(xdr_results, xdr_location); + } + + if (*msglen > RPC_MSG_SZ) { + cmn_err(CE_NOTE, + "svc_compose_rpcmsg: Server needs to send a reply" + "larger than RPC_MSG_SZ\n"); + return SVC_RDMA_FAIL; + } + /* * Get a pre-allocated buffer for rpc reply */ ! rpcreply->type = SEND_BUFFER; ! if (RDMA_BUF_ALLOC(conn, rpcreply)) { ! cmn_err(CE_WARN, "svc_compose_rpcmsg: no free buffers!"); ! return SVC_RDMA_FAIL; } ! ! if (has_args == FALSE || auth_flavor != RPCSEC_GSS) { ! xdrrdma_create(*xdrs, rpcreply->addr, rpcreply->len, ! rdma_minchunk, NULL, XDR_ENCODE, NULL); ! } ! else { ! xdrrdma_create(*xdrs, rpcreply->addr, *msglen, 0, NULL, XDR_ENCODE, NULL); } + + msg->rm_xid = clone_xprt->xp_xid; + + if (has_args) { + if (!(xdr_replymsg(*xdrs, msg) && + (!has_args + || SVCAUTH_WRAP(&clone_xprt->xp_auth, *xdrs, + xdr_results, xdr_location)))) { + if (auth_flavor == RPCSEC_GSS + && rpcreply->addr != (*xdrs)->x_base) { + rpcreply->addr = (*xdrs)->x_base; + rpcreply->len = xdr_getbufsize(*xdrs); } + rdma_buf_free(conn, rpcreply); + cmn_err(CE_WARN, + "svc_compose_rpcmsg: xdr_replymsg/SVCAUTH_WRAP " + "failed"); + return SVC_RDMA_FAIL; + } + if (auth_flavor == RPCSEC_GSS + && rpcreply->addr != (*xdrs)->x_base) { + rpcreply->addr = (*xdrs)->x_base; + rpcreply->len = xdr_getbufsize(*xdrs); + } + } + else { + if (!xdr_replymsg(*xdrs, msg)) { + rdma_buf_free(conn, rpcreply); + cmn_err(CE_WARN, + "svc_compose_rpcmsg: xdr_replymsg/SVCAUTH_WRAP " + "failed"); + return SVC_RDMA_FAIL; + } + } ! *len = XDR_GETPOS(*xdrs); ! if (auth_flavor == RPCSEC_GSS) { ! XDR_DESTROY(*xdrs); ! xdrrdma_create(*xdrs, rpcreply->addr, *len, 0, NULL, ! XDR_ENCODE, NULL); ! } ! return SVC_RDMA_SUCCESS; ! } ! ! /* ! * Send rpc reply. */ ! static bool_t ! svc_rdma_ksend(SVCXPRT * clone_xprt, struct rpc_msg *msg) ! { ! XDR *xdrs = &(clone_xprt->xp_xdrout); ! XDR rxdrs; ! CONN *conn = NULL; ! rdma_buf_t clmsg = {0}, rpcreply = {0}; ! ! struct clone_rdma_data *vd; ! struct clist *cl = NULL; ! struct clist *sendlist = NULL; ! struct clist *wcl = NULL; ! struct clist *reply_cl; ! xdrproc_t xdr_results; ! caddr_t xdr_location; ! ! int retval = FALSE; ! int status, msglen, num_wsegment = 0, num_wreply_segments = 0; ! uint32_t rdma_credit = 0, templen = 0; ! int freelen =0; ! bool_t has_args; ! uint_t len, op, vers; ! ! vd = (struct clone_rdma_data *) clone_xprt->xp_p2buf; ! conn = vd->conn; ! /* ! * If there is a result procedure specified in the reply message, ! * it will be processed in the xdr_replymsg and SVCAUTH_WRAP. ! * We need to make sure it won't be processed twice, so we null ! * it for xdr_replymsg here. */ ! has_args = FALSE; ! if (msg->rm_reply.rp_stat == MSG_ACCEPTED && ! msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { ! if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) { ! has_args = TRUE; ! xdr_location = msg->acpted_rply.ar_results.where; ! msg->acpted_rply.ar_results.proc = xdr_void; ! msg->acpted_rply.ar_results.where = NULL; } } /* ! * Use RDMA Write to return content requested by wlist. ! * Only 1 writechunk in writelist for now, ! * but this chunk can contain multiple rdma segments. */ ! if (vd->wlist) { ! status = svc_process_wlist(vd, xdr_results, xdr_location, ! &num_wsegment, &templen); ! if (status != SVC_RDMA_SUCCESS) { goto out; } } + #ifdef RPC_RDMA_INLINE + else if (xdr_results == x_READ3res) { + READ3resok *rok; + rok = &(((READ3res *) xdr_location)->res_u.ok); + rok->wlist = NULL; + } + #endif /* ! * Get the size of the rpc reply message. */ ! msglen = xdr_sizeof(xdr_replymsg, msg); ! ! /*reply_cl.c_daddr = NULL;*/ ! reply_cl = vd->reply_cl; ! ! if (vd->reply_cl) { ! ! status = svc_process_long_reply(clone_xprt, ! conn, xdr_results, ! xdr_location, (caddr_t)vd->reply_cl, ! &xdrs, msg, has_args, ! &msglen, &freelen, &num_wreply_segments, &len); ! if (status == SVC_RDMA_SUCCESS) { ! op = RDMA_NOMSG; ! cl = NULL; ! goto rdma_writed_long_reply_out; ! } ! else goto out; } + status = svc_compose_rpcmsg(clone_xprt, conn, xdr_results, + xdr_location, &rpcreply, &xdrs, msg, + has_args, &msglen, &len); + if (status != SVC_RDMA_SUCCESS) + goto out; ! op = RDMA_MSG; ! ! cl = xdrrdma_clist(xdrs); ! cl = NULL; if (cl != NULL) { ! cmn_err(CE_NOTE, ! "svc_rdma_ksend: Should not provide non-null" ! "read chunk list to client\n"); ! } ! ! rdma_writed_long_reply_out: ! ! clmsg.type = SEND_BUFFER; ! if (RDMA_BUF_ALLOC(conn, &clmsg)) { ! rdma_buf_free(conn, &rpcreply); ! cmn_err(CE_WARN, "svc_rdma_ksend: no free buffers!!"); goto out; } ! #ifdef DYNAMIC_CREDIT_CONTROL ! svc_grant_credit(conn, &rdma_credit); ! #else ! rdma_credit = rdma_bufs_granted; ! #endif ! vers = RPCRDMA_VERS; xdrs = &rxdrs; xdrmem_create(xdrs, clmsg.addr, clmsg.len, XDR_ENCODE); ! (*(uint32_t *) clmsg.addr) = msg->rm_xid; /* Skip xid and set the xdr position accordingly. */ ! XDR_SETPOS(xdrs, sizeof(uint32_t)); ! if (!xdr_u_int(xdrs, &vers) || ! !xdr_u_int(xdrs, &rdma_credit) || !xdr_u_int(xdrs, &op)) { ! rdma_buf_free(conn, &rpcreply); ! rdma_buf_free(conn, &clmsg); cmn_err(CE_WARN, "svc_rdma_ksend: xdr_u_int failed"); goto out; } /* ! * Now XDR the read chunk list, actually always NULL */ (void) xdr_do_clist(xdrs, &cl); /* ! * encode write list -- we already drove RDMA_WRITEs */ ! wcl = vd->wlist; ! if (!xdr_encode_wlist(xdrs, wcl, num_wsegment)) { ! cmn_err(CE_NOTE, ! "svc_rdma_ksend: xdr_encode_wlist failed: " ! "wcl=%p", (void *) wcl); ! rdma_buf_free(conn, &rpcreply); ! rdma_buf_free(conn, &clmsg); goto out; } /* ! * XDR encode the RDMA_REPLY write chunk */ ! (void) xdr_encode_reply_wchunk(xdrs, vd->reply_cl, num_wreply_segments); ! ! clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle, clmsg.addr, ! NULL, NULL); ! ! if (op == RDMA_MSG) { ! clist_add(&sendlist, 0, len, &rpcreply.handle, ! rpcreply.addr, NULL, NULL); } ! ! #if defined(ASYNC_SERVER_DEREG) ! status = RDMA_SEND_NW(conn, sendlist, msg->rm_xid, (caddr_t)conn, ! (caddr_t)vd->wlist, ! templen, ! (caddr_t)reply_cl, ! freelen, num_wsegment, num_wreply_segments ); ! #else ! status = RDMA_SEND(conn, sendlist, msg->rm_xid); #endif if (status != RDMA_SUCCESS) { goto out; } retval = TRUE; + out: /* * Free up sendlist chunks */ if (sendlist != NULL)
*** 958,980 **** clist_free(sendlist); /* * Destroy private data for xdr rdma */ XDR_DESTROY(&(clone_xprt->xp_xdrout)); /* * This is completely disgusting. If public is set it is * a pointer to a structure whose first field is the address * of the function to free that structure and any related * stuff. (see rrokfree in nfs_xdr.c). */ if (xdrs->x_public) { /* LINTED pointer alignment */ ! (**((int (**)())xdrs->x_public))(xdrs->x_public); } return (retval); } /* * Deserialize arguments. --- 1201,1267 ---- clist_free(sendlist); /* * Destroy private data for xdr rdma */ + if ((clone_xprt->xp_xdrout).x_private) XDR_DESTROY(&(clone_xprt->xp_xdrout)); + if (rxdrs.x_private) + XDR_DESTROY(&rxdrs); /* * This is completely disgusting. If public is set it is * a pointer to a structure whose first field is the address * of the function to free that structure and any related * stuff. (see rrokfree in nfs_xdr.c). */ if (xdrs->x_public) { /* LINTED pointer alignment */ ! (**((int (**)()) xdrs->x_public)) (xdrs->x_public); } + if (vd->wlist != NULL) { + #if defined(ASYNC_SERVER_DEREG) + if(!retval) { + #endif + wcl = vd->wlist; + (void) clist_deregister(vd->conn, wcl, TRUE); + #ifdef SERVER_REG_CACHE + RDMA_FREE_SERVER_CACHE_BUF(vd->conn, (rib_lrc_entry_t *)wcl->long_reply_buf); + #else + if(templen) + kmem_free((void *) (vd->wlist)->c_saddr, templen); + #endif + kmem_free(vd->wlist, num_wsegment * sizeof(struct clist)); + #if defined(ASYNC_SERVER_DEREG) + } + #endif + vd->wlist = NULL; + } + + + if(vd->reply_cl != NULL){ + #if defined(ASYNC_SERVER_DEREG) + if(!retval) { + #endif + (void) clist_deregister(conn, reply_cl, TRUE); + #ifdef SERVER_REG_CACHE + RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)reply_cl->long_reply_buf); + #else + #ifdef DEBUG + if(rdma_svc_debug > 1) + cmn_err(CE_NOTE, "Freeing up %p of length %d\n",reply_cl->c_saddr,freelen); + #endif + if(freelen) + kmem_free((void *)(reply_cl->c_saddr), freelen); + #endif + kmem_free((void *)vd->reply_cl, num_wreply_segments * sizeof(struct clist)); + #if defined(ASYNC_SERVER_DEREG) + } + #endif + vd->reply_cl = NULL; + } return (retval); } /* * Deserialize arguments.
*** 1235,1239 **** --- 1522,1561 ---- } drtprev = drt; drt = drt->dr_chain; } } + + bool_t + rdma_get_wchunk_seg(struct svc_req *req, iovec_t *iov) + { + struct clone_rdma_data *rcd; + struct clist *clist; + uint32_t tlen; + + if (req->rq_xprt->xp_type != T_RDMA) { + return (FALSE); + } + + rcd = (struct clone_rdma_data *)(&req->rq_xprt->xp_p2buf); + if (rcd->wlist == NULL) { + return (FALSE); + } + tlen = 0; + clist = rcd->wlist; + while(clist){ + tlen += clist->c_len; + clist = clist->c_next; + } + + /* + * set iov to addr+len of first segment of first wchunk of + * wlist sent by client. krecv() already malloc'd a buffer + * large enough, but registration is deferred until we write + * the buffer back to (NFS) client using RDMA_WRITE. + */ + iov->iov_base = (caddr_t)rcd->wlist->c_saddr; + iov->iov_len = tlen; + + return (TRUE); + } +