1 /*
  2  * CDDL HEADER START
  3  *
  4  * The contents of this file are subject to the terms of the
  5  * Common Development and Distribution License, Version 1.0 only
  6  * (the "License").  You may not use this file except in compliance
  7  * with the License.
  8  *
  9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 10  * or http://www.opensolaris.org/os/licensing.
 11  * See the License for the specific language governing permissions
 12  * and limitations under the License.
 13  *
 14  * When distributing Covered Code, include this CDDL HEADER in each
 15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 16  * If applicable, add the following below this CDDL HEADER, with the
 17  * fields enclosed by brackets "[]" replaced with your own identifying
 18  * information: Portions Copyright [yyyy] [name of copyright owner]
 19  *
 20  * CDDL HEADER END
 21  */
 22 /*
 23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 24  * Use is subject to license terms.
 25  */
 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
 27 /* All Rights Reserved */
 28 /*
 29  * Portions of this source code were derived from Berkeley
 30  * 4.3 BSD under license from the Regents of the University of
 31  * California.
 32  */
 33 
 34  /* Copyright (c) 2006, The Ohio State University. All rights reserved.
 35   *
 36   * Portions of this source code is developed by the team members of
 37   * The Ohio State University's Network-Based Computing Laboratory (NBCL),
 38   * headed by Professor Dhabaleswar K. (DK) Panda.
 39   *
 40   * Acknowledgements to contributions from developors:
 41   *   Ranjit Noronha: noronha@cse.ohio-state.edu
 42   *   Lei Chai      : chail@cse.ohio-state.edu
 43   *   Weikuan Yu    : yuw@cse.ohio-state.edu
 44   *
 45   */
 46 
 47 #pragma ident   "@(#)clnt_rdma.c        1.10    05/07/26 SMI"
 48 
 49 #include <sys/param.h>
 50 #include <sys/types.h>
 51 #include <sys/user.h>
 52 #include <sys/systm.h>
 53 #include <sys/sysmacros.h>
 54 #include <sys/errno.h>
 55 #include <sys/kmem.h>
 56 #include <sys/debug.h>
 57 #include <sys/systm.h>
 58 #include <sys/kstat.h>
 59 #include <sys/t_lock.h>
 60 #include <sys/ddi.h>
 61 #include <sys/cmn_err.h>
 62 #include <sys/time.h>
 63 #include <sys/isa_defs.h>
 64 #include <sys/zone.h>
 65 
 66 #include <rpc/types.h>
 67 #include <rpc/xdr.h>
 68 #include <rpc/auth.h>
 69 #include <rpc/clnt.h>
 70 #include <rpc/rpc_msg.h>
 71 #include <rpc/rpc_rdma.h>
 72 #include <nfs/nfs.h>
 73 
 74 #define CLNT_CREDIT_LOW (5)
 75 
 76 xdrproc_t x_READ3args = NULL_xdrproc_t;
 77 xdrproc_t x_READ3res = NULL_xdrproc_t;
 78 xdrproc_t x_READ3vres = NULL_xdrproc_t;
 79 xdrproc_t x_READ3uiores = NULL_xdrproc_t;
 80 
 81 static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST;
 82 
 83 int rdma_wlist_verbose_debug = 0;
 84 int rdma_wlist_memreg_debug = 0;
 85 int rdma_wlist_clnt_debug = 0;
 86 int rdma_wlist_svc_debug = 0;
 87 int rdma_wlist_xdr_debug = 0;
 88 int rdma_wlist_pglck_debug = 0;
 89 int credit_control_debug = 0;
 90 int rdma_long_reply_debug = 0;
 91 int rdma_xdr_long_reply_debug = 0;
 92 
 93 struct clist empty_cl = {0};
 94 
 95 static void clnt_read3args_make_wlist(caddr_t, struct clist **, xdrproc_t, uint_t *);
 96 static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *,
 97                                       XDR *, xdrproc_t, caddr_t);
 98 static int  clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *,
 99                                      XDR **, uint_t *);
100 static int clnt_setup_rlist(CONN *, XDR *, struct clist **);
101 static int clnt_setup_wlist(CONN *, rpcproc_t, struct clist **,
102                                    caddr_t, xdrproc_t, XDR *);
103 static int clnt_setup_long_reply(CONN *, rpcproc_t, struct clist *, 
104                 XDR *, bool_t *);
105 #ifdef DYNAMIC_CREDIT_CONTROL
106 static void clnt_compute_credit(CONN *, uint32_t *);
107 #endif
108 static void clnt_check_credit(CONN *);
109 static void clnt_return_credit(CONN *);
110 static int clnt_decode_long_reply(CONN *, rpcproc_t, struct clist *,
111                 struct clist *, XDR *, XDR **, struct clist *, 
112                 struct clist *, uint_t,uint_t);
113 
114 static void clnt_update_credit(CONN *,uint32_t);
115 static void check_dereg_wlist(CONN *, struct clist *);
116 
117 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
118     caddr_t, xdrproc_t, caddr_t, struct timeval);
119 static void     clnt_rdma_kabort(CLIENT *);
120 static void     clnt_rdma_kerror(CLIENT *, struct rpc_err *);
121 static bool_t   clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
122 static void     clnt_rdma_kdestroy(CLIENT *);
123 static bool_t   clnt_rdma_kcontrol(CLIENT *, int, char *);
124 static int      clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
125     struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
126 
127 /*
128  * Operations vector for RDMA based RPC
129  */
130 static struct clnt_ops rdma_clnt_ops = {
131         clnt_rdma_kcallit,      /* do rpc call */
132         clnt_rdma_kabort,       /* abort call */
133         clnt_rdma_kerror,       /* return error status */
134         clnt_rdma_kfreeres,     /* free results */
135         clnt_rdma_kdestroy,     /* destroy rpc handle */
136         clnt_rdma_kcontrol,     /* the ioctl() of rpc */
137         clnt_rdma_ksettimers,   /* set retry timers */
138 };
139 
140 /*
141  * The size of the preserialized RPC header information.
142  */
143 #define CKU_HDRSIZE     20
144 #define CLNT_RDMA_SUCCESS 0
145 #define CLNT_RDMA_FAIL -99
146 
147 /*
148  * Per RPC RDMA endpoint details
149  */
150 typedef struct cku_private {
151         CLIENT                  cku_client;     /* client handle */
152         rdma_mod_t              *cku_rd_mod;    /* underlying RDMA mod */
153         void                    *cku_rd_handle; /* underlying RDMA device */
154         struct netbuf           cku_addr;       /* remote netbuf address */
155         int                     cku_addrfmly;   /* for finding addr_type */
156         struct rpc_err          cku_err;        /* error status */
157         struct cred             *cku_cred;      /* credentials */
158         XDR                     cku_outxdr;     /* xdr stream for output */
159         uint32_t                cku_outsz;
160         XDR                     cku_inxdr;      /* xdr stream for input */
161         char                    cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
162         uint32_t                cku_xid;        /* current XID */
163 } cku_private_t;
164 
165 #define CLNT_RDMA_DELAY 10      /* secs to delay after a connection failure */
166 static int clnt_rdma_min_delay = CLNT_RDMA_DELAY;
167 
168 struct {
169         kstat_named_t   rccalls;
170         kstat_named_t   rcbadcalls;
171         kstat_named_t   rcbadxids;
172         kstat_named_t   rctimeouts;
173         kstat_named_t   rcnewcreds;
174         kstat_named_t   rcbadverfs;
175         kstat_named_t   rctimers;
176         kstat_named_t   rccantconn;
177         kstat_named_t   rcnomem;
178         kstat_named_t   rcintrs;
179         kstat_named_t   rclongrpcs;
180 } rdmarcstat = {
181         { "calls",      KSTAT_DATA_UINT64 },
182         { "badcalls",   KSTAT_DATA_UINT64 },
183         { "badxids",    KSTAT_DATA_UINT64 },
184         { "timeouts",   KSTAT_DATA_UINT64 },
185         { "newcreds",   KSTAT_DATA_UINT64 },
186         { "badverfs",   KSTAT_DATA_UINT64 },
187         { "timers",     KSTAT_DATA_UINT64 },
188         { "cantconn",   KSTAT_DATA_UINT64 },
189         { "nomem",      KSTAT_DATA_UINT64 },
190         { "interrupts", KSTAT_DATA_UINT64 },
191         { "longrpc",    KSTAT_DATA_UINT64 }
192 };
193 
194 kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat;
195 uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t);
196 
197 #ifdef DEBUG
198 int rdma_clnt_debug = 0;
199 #endif
200 
201 #ifdef accurate_stats
202 extern kmutex_t rdmarcstat_lock;    /* mutex for rcstat updates */
203 
204 #define RCSTAT_INCR(x)                  \
205         mutex_enter(&rdmarcstat_lock);      \
206         rdmarcstat.x.value.ui64++;      \
207         mutex_exit(&rdmarcstat_lock);
208 #else
209 #define RCSTAT_INCR(x)                  \
210         rdmarcstat.x.value.ui64++;
211 #endif
212 
213 #define ptoh(p)         (&((p)->cku_client))
214 #define htop(h)         ((cku_private_t *)((h)->cl_private))
215 
216 int
217 clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family,
218     rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl)
219 {
220         CLIENT *h;
221         struct cku_private *p;
222         struct rpc_msg call_msg;
223         rdma_registry_t *rp;
224 
225         ASSERT(INGLOBALZONE(curproc));
226 
227         if (cl == NULL)
228                 return (EINVAL);
229         *cl = NULL;
230 
231         p = kmem_zalloc(sizeof (*p), KM_SLEEP);
232 
233         /*
234          * Find underlying RDMATF plugin
235          */
236         rw_enter(&rdma_lock, RW_READER);
237         rp = rdma_mod_head;
238         while (rp != NULL) {
239                 if (strcmp(rp->r_mod->rdma_api, proto))
240                         rp = rp->r_next;
241                 else {
242                         p->cku_rd_mod = rp->r_mod;
243                         p->cku_rd_handle = handle;
244                         break;
245                 }
246         }
247         rw_exit(&rdma_lock);
248 
249         if (p->cku_rd_mod == NULL) {
250                 /*
251                  * Should not happen.
252                  * No matching RDMATF plugin.
253                  */
254                 kmem_free(p, sizeof (struct cku_private));
255                 return (EINVAL);
256         }
257 
258         h = ptoh(p);
259         h->cl_ops = &rdma_clnt_ops;
260         h->cl_private = (caddr_t)p;
261         h->cl_auth = authkern_create();
262 
263         /* call message, just used to pre-serialize below */
264         call_msg.rm_xid = 0;
265         call_msg.rm_direction = CALL;
266         call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
267         call_msg.rm_call.cb_prog = pgm;
268         call_msg.rm_call.cb_vers = vers;
269 
270         xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE);
271         /* pre-serialize call message header */
272         if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
273                 XDR_DESTROY(&p->cku_outxdr);
274                 auth_destroy(h->cl_auth);
275                 kmem_free(p, sizeof (struct cku_private));
276                 return (EINVAL);
277         }
278 
279         /*
280          * Set up the rpc information
281          */
282         p->cku_cred = cred;
283         p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
284         p->cku_addr.maxlen = raddr->maxlen;
285         p->cku_addr.len = raddr->len;
286         bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
287         p->cku_addrfmly = family;
288 
289         *cl = h;
290         return (0);
291 }
292 
293 static void
294 clnt_rdma_kdestroy(CLIENT *h)
295 {
296         struct cku_private *p = htop(h);
297 
298         kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
299         kmem_free(p, sizeof (*p));
300 } 
301 
302 void
303 clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr,
304     struct cred *cred)
305 {
306         struct cku_private *p = htop(h);
307         rdma_registry_t *rp;
308 
309         ASSERT(INGLOBALZONE(curproc));
310         /*
311          * Find underlying RDMATF plugin
312          */
313         p->cku_rd_mod = NULL;
314         rw_enter(&rdma_lock, RW_READER);
315         rp = rdma_mod_head;
316         while (rp != NULL) {
317                 if (strcmp(rp->r_mod->rdma_api, proto))
318                         rp = rp->r_next;
319                 else {
320                         p->cku_rd_mod = rp->r_mod;
321                         p->cku_rd_handle = handle;
322                         break;
323                 }
324 
325         }
326         rw_exit(&rdma_lock);
327 
328         /*
329          * Set up the rpc information
330          */
331         p->cku_cred = cred;
332         p->cku_xid = 0;
333 
334         if (p->cku_addr.maxlen < raddr->len) {
335                 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
336                         kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
337                 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
338                 p->cku_addr.maxlen = raddr->maxlen;
339         }
340 
341         p->cku_addr.len = raddr->len;
342         bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
343         h->cl_ops = &rdma_clnt_ops;
344 }
345 
346 static int clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum, 
347                                rdma_buf_t *rpcmsg, XDR *xdrs, 
348                                xdrproc_t xdr_args, caddr_t argsp)
349 {
350     cku_private_t *p = htop(h);
351     
352     if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
353             /*
354              * Copy in the preserialized RPC header
355              * information.
356              */
357             bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE);
358 
359             /*
360              * transaction id is the 1st thing in the output
361              * buffer.
362              */
363             /* LINTED pointer alignment */
364             (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid;
365 
366             /* Skip the preserialized stuff. */
367             XDR_SETPOS(xdrs, CKU_HDRSIZE);
368 
369             /* Serialize dynamic stuff into the output buffer. */
370             if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
371                 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
372                 (!(*xdr_args)(xdrs, argsp))) {
373                     cmn_err(CE_WARN,"Failed to serialize dynamic arguments\n");
374                     return CLNT_RDMA_FAIL;
375                 }
376             p->cku_outsz = XDR_GETPOS(xdrs);
377     } else {
378             uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
379             IXDR_PUT_U_INT32(uproc, procnum);
380             (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
381             XDR_SETPOS(xdrs, 0);
382 
383             /* Serialize the procedure number and the arguments. */
384             if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
385                 CKU_HDRSIZE+4, xdrs, NULL, NULL) || 
386                 !(*xdr_args)(xdrs, argsp)) {
387                    if (rpcmsg->addr != xdrs->x_base) {
388                        rpcmsg->addr = xdrs->x_base;
389                        rpcmsg->len = xdr_getbufsize(xdrs);
390                    }
391                     cmn_err(CE_WARN,"Failed to serialize procedure number and the arguments.\n");
392                    return CLNT_RDMA_FAIL;
393              }
394             /*
395              * If we had to allocate a new buffer while encoding
396              * then update the addr and len.
397              */
398                 if (rpcmsg->addr != xdrs->x_base) {
399                     rpcmsg->addr = xdrs->x_base;
400                     rpcmsg->len = xdr_getbufsize(xdrs);
401                 }
402 
403                 p->cku_outsz = XDR_GETPOS(xdrs);
404         }
405 
406     return CLNT_RDMA_SUCCESS;
407 }
408 
409 static int clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg,
410                                     XDR **xdrs, uint_t *op)
411 {
412         cku_private_t *p = htop(h);
413         uint_t vers;
414         uint32_t rdma_credit = rdma_bufs_rqst;
415 
416         vers = RPCRDMA_VERS;
417         clmsg->type = SEND_BUFFER;
418 
419 #ifdef DYNAMIC_CREDIT_CONTROL
420         clnt_compute_credit(conn, &rdma_credit);
421 #endif
422 
423         if (RDMA_BUF_ALLOC(conn, clmsg)) {
424                 return CLNT_RDMA_FAIL;
425         }
426 
427         *xdrs = &p->cku_outxdr;
428         xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE);
429 
430         (*(uint32_t *)clmsg->addr) = p->cku_xid;
431         XDR_SETPOS(*xdrs, sizeof (uint32_t));
432         (void) xdr_u_int(*xdrs, &vers);
433         (void) xdr_u_int(*xdrs, &rdma_credit);
434         (void) xdr_u_int(*xdrs, op);
435 
436         return CLNT_RDMA_SUCCESS;
437 }
438 
439 static int clnt_setup_rlist(CONN *conn, XDR *xdrs, struct clist **cl)
440 {
441         int ret;
442 
443         if (*cl != NULL) {
444                 ret = clist_register(conn, *cl, 1);
445                 if (ret != RDMA_SUCCESS) {
446                    return CLNT_RDMA_FAIL;
447                 }
448         }
449         (void) xdr_do_clist(xdrs, cl);
450         
451         return CLNT_RDMA_SUCCESS;
452 }
453 
454 static int clnt_setup_wlist(CONN *conn, rpcproc_t procnum, 
455         struct clist **rpccall_wlist, caddr_t resultsp, 
456         xdrproc_t xdr_results, XDR *xdrs)
457 {
458         int status;
459         uint_t num_segment = 0;
460 
461         if (procnum == NFSPROC3_READ) {
462                 clnt_read3args_make_wlist(resultsp, rpccall_wlist, 
463                                 xdr_results, &num_segment);
464                 status = clist_register(conn, *rpccall_wlist, 0);
465                 if (status != RDMA_SUCCESS) 
466                         return CLNT_RDMA_FAIL;
467         } else {
468                 *rpccall_wlist = NULL;
469         }
470  
471         if (! xdr_encode_wlist(xdrs, *rpccall_wlist, num_segment)) 
472                 return CLNT_RDMA_FAIL;
473 
474         return CLNT_RDMA_SUCCESS;
475 }
476 
477 static int clnt_setup_long_reply(CONN *conn, rpcproc_t procnum, 
478                 struct clist *lrc_clist, 
479                 XDR *xdrs, bool_t *exists)
480 {
481         int status;
482         caddr_t addr;
483 #ifdef SERVER_REG_CACHE
484         rib_lrc_entry_t *long_reply_buf = NULL;
485 #endif
486         *exists = FALSE;
487         lrc_clist->c_daddr = NULL;
488 
489 #ifdef RPC_RDMA_INLINE
490         if (lrc_clist->c_len < rdma_minchunk)
491                 return CLNT_RDMA_SUCCESS;
492 #endif
493 
494         if (procnum == NFSPROC3_READDIR || 
495             procnum == NFSPROC3_READDIRPLUS || 
496             procnum == NFSPROC3_READLINK) {
497 #ifndef SERVER_REG_CACHE
498                 addr = kmem_alloc(LONG_REPLY_LEN, KM_SLEEP);
499                 bzero(addr, LONG_REPLY_LEN);
500                 lrc_clist->c_daddr        = (uint64)addr;
501                 lrc_clist->c_len          = LONG_REPLY_LEN;
502                 lrc_clist->c_next         = NULL;
503                 lrc_clist->long_reply_buf = NULL;
504                 status = clist_register(conn, lrc_clist, 0);
505 #else
506                 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, LONG_REPLY_LEN);
507                 bzero(long_reply_buf->lrc_buf, LONG_REPLY_LEN);
508                 lrc_clist->c_daddr        = (uint64)long_reply_buf->lrc_buf;
509                 lrc_clist->c_len          = LONG_REPLY_LEN;
510                 lrc_clist->c_next         = NULL;
511                 lrc_clist->long_reply_buf = (uint64)long_reply_buf;
512                 lrc_clist->c_dmemhandle   = long_reply_buf->lrc_mhandle;
513                 status = clist_register(conn, lrc_clist, 0);
514 #endif
515                 if(status) {
516                         cmn_err(CE_WARN, "clnt_setup_long_reply: cannot register buffer");
517 #ifndef SERVER_REG_CACHE
518                         kmem_free((void*)addr, (size_t)LONG_REPLY_LEN);
519 #else
520                  RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf);
521 
522 #endif
523                         lrc_clist->c_daddr = NULL;
524                         return CLNT_RDMA_FAIL;
525                 }
526                 *exists = TRUE;
527         } 
528 
529         return CLNT_RDMA_SUCCESS;
530 }
531 
532 static void
533 clnt_read3args_make_wlist(caddr_t replyp, struct clist **rpccall_wlist, 
534                           xdrproc_t xr, uint_t *num_segment)
535 {
536         READ3uiores *ures = (READ3uiores *)replyp;
537         READ3vres   *vres = (READ3vres *)replyp;
538         struct clist *rwl = NULL, *prev = NULL;
539         int i, total_length;
540 
541         *rpccall_wlist = NULL;
542 
543 #ifdef RPC_RDMA_INLINE
544         if (xr == x_READ3uiores) {
545                 total_length = 0;
546                 for(i=0; i<ures->uiop->uio_iovcnt; i++) {
547                         total_length += ures->uiop->uio_iov[i].iov_len;
548                 }
549         } else {
550                 total_length = vres->data.data_len;
551         }
552 
553         if (total_length < rdma_minchunk)
554                 return;
555 #endif
556 
557         /* XXX: fake a chunk threshold for the combined length for now */
558         if (xr == x_READ3uiores) {
559                 *num_segment = ures->uiop->uio_iovcnt;
560                 for(i=0; i<ures->uiop->uio_iovcnt; i++) {
561                         rwl = (struct clist *)kmem_zalloc(sizeof(struct clist), 
562                                         KM_SLEEP);
563 
564                         rwl->c_len = ures->uiop->uio_iov[i].iov_len;
565                         rwl->c_daddr = (uint64)(ures->uiop->uio_iov[i].iov_base);
566                         /*
567                          * if userspace address, put adspace ptr in clist.  
568                          * If not, then do nothing since it's already 
569                          * set to NULL (from empty_cl)
570                          */
571                         if (ures->uiop->uio_segflg == UIO_USERSPACE) {
572                                 int error;
573                                 rwl->c_adspc = ttoproc(curthread)->p_as;
574                         } else {
575                                 rwl->c_dpplist = (page_t **)NULL;
576                         }
577 
578                         if(prev == NULL)
579                                 prev = rwl;
580                         else {
581                                 prev->c_next = rwl;
582                                 prev = rwl;
583                         }
584 
585                         if(*rpccall_wlist == NULL)
586                                 *rpccall_wlist = rwl;
587                 }
588                 rwl->c_next = NULL;
589         } else if (xr == x_READ3vres) {
590                 *num_segment = 1;
591                 rwl = (struct clist *)kmem_zalloc(sizeof (struct clist), 
592                                 KM_SLEEP);
593                 *rwl = empty_cl;
594 
595                 rwl->c_len = vres->data.data_len;
596                 rwl->c_daddr = (uint64)(vres->data.data_val);
597 
598                 if(*rpccall_wlist == NULL)
599                         *rpccall_wlist = rwl;
600         } else {
601                 /*cmn_err(CE_NOTE, "read3args_make_wlist: non READ3xr=%p", 
602                                 (void *)xr);*/
603         }
604 }
605 
606 /* ARGSUSED */
607 static enum clnt_stat
608 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
609     caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait)
610 {
611         cku_private_t *p = htop(h);
612         int     status;
613         XDR     *xdrs;
614         XDR     *cxdrp = NULL, callxdr; /* for xdrrdma encoding the RPC call */
615         XDR     *rxdrp = NULL, replxdr; /* for xdrrdma decoding the RPC reply */
616         struct rpc_msg  reply_msg;
617         struct clist *sendlist = NULL, *recvlist = NULL; 
618         struct clist *cl = NULL, *cle = NULL, *rdma_reply = NULL; 
619         uint_t vers, op;
620         uint_t off;
621         uint32_t xid;
622         uint32_t seg_array_len;
623         CONN *conn = NULL;
624         rdma_buf_t clmsg = {0}, rpcmsg = {0}; 
625         int msglen;
626         clock_t ticks;
627         bool_t wlist_exists_reply  = FALSE;
628         bool_t long_reply_buf_exists = FALSE;
629 
630         struct clist *rpccall_wlist = NULL, *rpcreply_wlist = NULL, 
631                      long_reply_clist ={0};
632         rpccall_read_t read_type;
633         rpccall_write_t write_type;
634         uint32_t rdma_credit = rdma_bufs_rqst;
635         struct clist long_reply_buf_clist = {0};
636 
637         RCSTAT_INCR(rccalls);
638         /*
639          * Get unique xid
640          */
641         if (p->cku_xid == 0)
642                 p->cku_xid = alloc_xid();
643 
644         status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr,
645             p->cku_addrfmly, p->cku_rd_handle, &conn);
646 
647         if (conn == NULL) {
648                 /*
649                  * Connect failed to server. Could be because of one
650                  * of several things. In some cases we don't want
651                  * the caller to retry immediately - delay before
652                  * returning to caller.
653                  */
654                 switch (status) {
655                 case RDMA_TIMEDOUT:
656                         /*
657                          * Already timed out. No need to delay
658                          * some more.
659                          */
660                         p->cku_err.re_status = RPC_TIMEDOUT;
661                         p->cku_err.re_errno = ETIMEDOUT;
662                         break;
663                 case RDMA_INTR:
664                         /*
665                          * Failed because of an signal. Very likely
666                          * the caller will not retry.
667                          */
668                         p->cku_err.re_status = RPC_INTR;
669                         p->cku_err.re_errno = EINTR;
670                         break;
671                 default:
672                         /*
673                          * All other failures - server down or service
674                          * down or temporary resource failure. Delay before
675                          * returning to caller.
676                          */
677                         ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
678                         p->cku_err.re_status = RPC_CANTCONNECT;
679                         p->cku_err.re_errno = EIO;
680 
681                         if (h->cl_nosignal == TRUE) {
682                                 delay(ticks);
683                         } else {
684                                 if (delay_sig(ticks) == EINTR) {
685                                         p->cku_err.re_status = RPC_INTR;
686                                         p->cku_err.re_errno = EINTR;
687                                 }
688                         }
689                         break;
690                 }
691 
692                 return (p->cku_err.re_status);
693         }
694 
695         clnt_check_credit(conn);
696 
697         /*
698          * Get the size of the rpc call message. Need this
699          * to determine if the rpc call message will fit in
700          * the pre-allocated RDMA buffers. If the rpc call
701          * message length is greater that the pre-allocated
702          * buffers then, it is a Long RPC. A one time use
703          * buffer is allocated and registered for the Long
704          * RPC call.
705          */
706         xdrs = &callxdr;
707         msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT;
708 
709         if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
710                 msglen += xdrrdma_authsize(h->cl_auth, p->cku_cred,
711                                 rdma_minchunk);
712                 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk);
713 
714                 if (msglen > RPC_MSG_SZ)  
715                     read_type = RPCCALL_RCHUNK; 
716                 else  
717                     read_type = RPCCALL_NORCHUNK; 












718         } else {
719                 /*


















720                  * For RPCSEC_GSS since we cannot accurately presize the
721                  * buffer required for encoding, we assume that its going
722                  * to be a Long RPC to start with. We also create the
723                  * the XDR stream with min_chunk set to 0 which instructs
724                  * the XDR layer to not chunk the incoming byte stream.
725                  */
726 
727                 msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth);
728                 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk); 
729 
730                 if (msglen > RPC_MSG_SZ) 
731                     read_type = RPCCALL_RCHUNK; 
732                 else 
733                     read_type = RPCCALL_NORCHUNK; 










734         }
735 
736         if (read_type == RPCCALL_NORCHUNK) { 





737 
738                 rpcmsg.type = SEND_BUFFER; 
739                 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) { 
740                         cmn_err(CE_WARN, "clnt_rdma_kcallit: no buffers!"); 


















741                         goto done;
742                 }

743         } else {
744 #ifdef SERVER_REG_CACHE 
745                 rib_lrc_entry_t *long_reply_buf = NULL; 
746 #endif 
747                 rpcmsg.type = CHUNK_BUFFER; 
748 #ifdef SERVER_REG_CACHE 
749                  long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, msglen); 
750                 rpcmsg.addr =  long_reply_buf->lrc_buf; 
751 #else 
752                 rpcmsg.addr = kmem_zalloc(msglen, KM_SLEEP); 
753 #endif 
754                 cle = (struct clist *)kmem_zalloc(sizeof (struct clist), 
755                                                   KM_SLEEP); 
756                 cle->c_xdroff = 0; 
757                 cle->c_len    = rpcmsg.len = msglen; 
758                 cle->c_saddr  = (uint64)(uintptr_t)rpcmsg.addr; 
759                 cle->c_next   = NULL; 
760 #ifdef SERVER_REG_CACHE 
761                 cle->long_reply_buf  = (uint64)long_reply_buf; 
762 #endif 
763         }
764  
765         op = cle ? RDMA_NOMSG : RDMA_MSG; 
766         cxdrp = xdrs; 
767         xdrrdma_create(xdrs, rpcmsg.addr, (cle ? msglen : rpcmsg.len), 
768                        rdma_minchunk, cle, XDR_ENCODE, NULL); 
769          
770         status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, xdrs, xdr_args, argsp); 
771         if (status != CLNT_RDMA_SUCCESS) { 
772             rdma_buf_free(conn, &rpcmsg); 
773             clist_free(cle);
774             p->cku_err.re_status = RPC_CANTENCODEARGS;
775             p->cku_err.re_errno = EIO;
776             cmn_err(CE_WARN,
777                     "clnt_rdma_kcallit: clnt_compose_rpcmsg failed"); 
778             goto done;
779         }








780 
781         /*  Read chunklist (a linked list of  N elements, 
782          *  position P (same P for all chunks of same arg!): 
783          *    1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 

784          */






























785 
786         cl = xdrrdma_clist(xdrs);
787 
788         /*
789          * Update the chunk size information for the Long RPC msg.
790          */
791         if (cl && op == RDMA_NOMSG)
792                 cl->c_len = p->cku_outsz;
793 
794         /*
795          * Prepare the header for the RDMA chunk 
796          */
797         status = clnt_compose_rdma_header(conn, h, &clmsg, &xdrs, &op); 
798         if (status != CLNT_RDMA_SUCCESS) { 

799                 p->cku_err.re_status = RPC_CANTSEND;
800                 p->cku_err.re_errno = EIO;
801                 rdma_buf_free(conn, &rpcmsg);
802                 clist_free(cle);
803                 RCSTAT_INCR(rcnomem);
804                 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffers!!");
805                 goto done;
806         }











807 
808         status = clnt_setup_rlist(conn, xdrs, &cl); 
809         if (status != CLNT_RDMA_SUCCESS) { 
810             cmn_err(CE_WARN, "clnt_rdma_kcallit: clist register failed"); 
811             rdma_buf_free(conn, &clmsg); 
812             rdma_buf_free(conn, &rpcmsg); 
813             clist_free(cl); 
814             p->cku_err.re_status = RPC_CANTSEND; 
815             p->cku_err.re_errno = EIO; 
816             goto done; 
817         } 
818 
819         /* Setup write chunk list for NFS3 READ operation  
820          * Other operations will have a NULL wlist  
821          */
822         status = clnt_setup_wlist(conn, procnum, &rpccall_wlist,  
823                                   resultsp, xdr_results, xdrs); 
824         if (status != CLNT_RDMA_SUCCESS) { 

825               rdma_buf_free(conn, &clmsg);
826               rdma_buf_free(conn, &rpcmsg);
827               clist_free(cl);
828               p->cku_err.re_status = RPC_CANTSEND;
829               p->cku_err.re_errno = EIO;
830               goto done;
831         }
832 
833         status = clnt_setup_long_reply(conn, procnum, &long_reply_buf_clist, 
834                         xdrs, &long_reply_buf_exists);
835         if (status != CLNT_RDMA_SUCCESS) {
836               rdma_buf_free(conn, &clmsg);
837               rdma_buf_free(conn, &rpcmsg);
838               clist_free(cl);
839               p->cku_err.re_status = RPC_CANTSEND;
840               p->cku_err.re_errno = EIO;
841               goto done;
842         }

843 
844         /*
845          * XDR encode the RDMA_REPLY write chunk
846          */
847         seg_array_len = (long_reply_buf_exists ? 1:0);
848         (void) xdr_encode_reply_wchunk(xdrs, &long_reply_buf_clist, seg_array_len);
849         /*
850          * Start with the RDMA header and clist (if any)
851          */
852         sendlist = NULL;
853         clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle,
854                 clmsg.addr, NULL, NULL);

855         /*
856          * Put the RPC call message in the send list if small RPC
857          */
858         if (op == RDMA_MSG) {
859                 clist_add(&sendlist, 0, p->cku_outsz, &rpcmsg.handle,
860                         rpcmsg.addr, NULL, NULL);
861         } else {
862                 /* Long RPC already in chunk list */
863                 RCSTAT_INCR(rclongrpcs);
864         }
865 
866         /*
867          * Set up a reply buffer ready for the reply
868          */
869         status = rdma_clnt_postrecv(conn, p->cku_xid);
870         if (status != RDMA_SUCCESS) {
871                 rdma_buf_free(conn, &clmsg);
872                 rdma_buf_free(conn, &rpcmsg);
873                 if (cl) {
874                         (void) clist_deregister(conn, cl, 1);
875                         clist_free(cl);
876                 }
877                 clist_free(sendlist);
878                 p->cku_err.re_status = RPC_CANTSEND;
879                 p->cku_err.re_errno = EIO;
880                 goto done;
881         }
882         /*
883          * sync the memory for dma
884          */
885         if (cl != NULL) {
886                 status = clist_syncmem(conn, cl, 1);
887                 if (status != RDMA_SUCCESS) {
888                         rdma_buf_free(conn, &clmsg);
889                         rdma_buf_free(conn, &rpcmsg);
890                         (void) clist_deregister(conn, cl, 1);
891                         clist_free(cl);
892                         clist_free(sendlist);
893                         p->cku_err.re_status = RPC_CANTSEND;
894                         p->cku_err.re_errno = EIO;
895                         goto done;
896                 }
897         }
898 
899         /*
900          * Send the call message to the server
901          */
902 #if defined (CLNT_INTERRUPT_COAL)
903         status = RDMA_SEND_BL(conn, sendlist, p->cku_xid);
904 #else
905         status = RDMA_SEND(conn, sendlist, p->cku_xid);
906 #endif
907         if (status != RDMA_SUCCESS) {
908                 if (cl) {
909                         (void) clist_deregister(conn, cl, 1);
910                         clist_free(cl);
911                         /*
912                          * If this was a long RPC message, need
913                          * to free that buffer.
914                          */
915                         if (rpcmsg.type == CHUNK_BUFFER)
916                                 rdma_buf_free(conn, &rpcmsg);
917                 }
918                 clist_free(sendlist);
919                 p->cku_err.re_status = RPC_CANTSEND;
920                 p->cku_err.re_errno = EIO;
921                 goto done;
922         } else {
923                 /*
924                  * RDMA plugin now owns the send msg buffers.
925                  * Clear them out and don't free them here.
926                  */
927                 clmsg.addr = NULL;
928                 if (rpcmsg.type == SEND_BUFFER)
929                         rpcmsg.addr = NULL;
930         }
931         clist_free(sendlist);





932 
933         /*
934          * Recv rpc reply
935          */
936         status = RDMA_RECV(conn, &recvlist, p->cku_xid);
937         clnt_return_credit(conn);
938 
939         /*
940          * Deregister chunks sent. Do this only after the reply
941          * is received as that is a sure indication that the
942          * remote end has completed RDMA of the chunks.
943          */
944         if (cl != NULL) {
945                 /*
946                  * Deregister the chunks
947                  */
948                 (void) clist_deregister(conn, cl, 1);
949                 clist_free(cl);
950                 /*
951                  * If long RPC free chunk
952                  */
953                 rdma_buf_free(conn, &rpcmsg);
954         }
955 
956         /*
957          * Now check recv status
958          */
959         if (status != 0) {






960                 if (status == RDMA_INTR) {
961                         p->cku_err.re_status = RPC_INTR;
962                         p->cku_err.re_errno = EINTR;
963                         RCSTAT_INCR(rcintrs);
964                 } else if (status == RPC_TIMEDOUT) {
965                         p->cku_err.re_status = RPC_TIMEDOUT;
966                         p->cku_err.re_errno = ETIMEDOUT;
967                         RCSTAT_INCR(rctimeouts);
968                 } else {
969                         p->cku_err.re_status = RPC_CANTRECV;
970                         p->cku_err.re_errno = EIO;
971                 }
972                 goto done;
973         }




974         /*
975          * Process the reply message.
976          *
977          * First the chunk list (if any)
978          */
979         xdrs = &(p->cku_inxdr);
980         xdrmem_create(xdrs, (caddr_t)(uintptr_t)recvlist->c_saddr,
981             recvlist->c_len, XDR_DECODE);
982         /*
983          * Treat xid as opaque (xid is the first entity
984          * in the rpc rdma message).
985          */
986         xid = *(uint32_t *)(uintptr_t)recvlist->c_saddr;
987         /* Skip xid and set the xdr position accordingly. */
988         XDR_SETPOS(xdrs, sizeof (uint32_t));
989         (void) xdr_u_int(xdrs, &vers);
990         (void) xdr_u_int(xdrs, &rdma_credit); 
991         (void) xdr_u_int(xdrs, &op);
992         (void) xdr_do_clist(xdrs, &cl); 
993          clnt_update_credit(conn, rdma_credit);  
994          wlist_exists_reply = FALSE; 
995          if (! xdr_decode_wlist(xdrs, &rpcreply_wlist, &wlist_exists_reply)) { 
996                  cmn_err(CE_NOTE, 
997                          "clnt_rdma_kcallit: xdr_decode_wlist failed"); 
998                  /* XXX: what should we fail with here -- EIO? */ 
999          } 
1000 #ifdef RPC_RDMA_INLINE 
1001          if (xdr_results == x_READ3vres) { 
1002                  ((READ3vres *)resultsp)->wlist = NULL; 
1003          } else if (xdr_results == x_READ3uiores) { 
1004                  ((READ3uiores *)resultsp)->wlist = NULL; 
1005          } 
1006 #endif 
1007 
1008          if (procnum == NFSPROC3_READ) { 






1009 
1010                  check_dereg_wlist(conn, rpccall_wlist); 





1011 
1012                  if (wlist_exists_reply) { 
1013                          if (xdr_results == x_READ3vres) { 
1014                                  ((READ3vres *)resultsp)->wlist = 
1015                                          rpcreply_wlist; 
1016                                  ((READ3vres *)resultsp)->wlist_len = 
1017                                          rpcreply_wlist->c_len; 
1018                          } else if (xdr_results == x_READ3uiores) { 
1019                                  ((READ3uiores *)resultsp)->wlist = 
1020                                          rpcreply_wlist; 
1021                                  ((READ3uiores *)resultsp)->wlist_len = 
1022                                          rpcreply_wlist->c_len; 
1023                          } else { 
1024                                  cmn_err(CE_NOTE, 
1025                                          "unknown READ3 xdr decode fnp=%p", 
1026                                          (void *)xdr_results); 
1027                          }



















1028                  }
1029          } else { 
1030                  if(wlist_exists_reply) 
1031                          cmn_err(CE_NOTE, 
1032                                  "clnt_rdma_kcallit: received wlist for " 
1033                                  "non-READ3 call.  reply xdr decode fnp=%p", 
1034                                  (void *)xdr_results); 






1035          }
1036 
1037          /*
1038           * The server shouldn't have sent a RDMA_SEND that 
1039           * the client needs to RDMA_WRITE a reply back to 
1040           * the server.  So silently ignoring what the 
1041           * server returns in the rdma_reply section of the 
1042           * header. 
1043           */
1044         (void) xdr_decode_reply_wchunk(xdrs, &rdma_reply,conn); 
1045         off = xdr_getpos(xdrs); 










1046 
1047         xdrs = &replxdr;
1048         if (clnt_decode_long_reply(conn, procnum, &long_reply_buf_clist,
1049                                   rdma_reply, xdrs, &rxdrp,
1050                                   cl, recvlist, op, off) != CLNT_RDMA_SUCCESS) 
1051         {
1052                 goto done;
1053         }       
1054         reply_msg.rm_direction = REPLY;
1055         reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
1056         reply_msg.acpted_rply.ar_stat = SUCCESS;
1057         reply_msg.acpted_rply.ar_verf = _null_auth;
1058         /*
1059          *  xdr_results will be done in AUTH_UNWRAP.
1060          */
1061         reply_msg.acpted_rply.ar_results.where = NULL;
1062         reply_msg.acpted_rply.ar_results.proc = xdr_void;
1063 
1064         /*
1065          * Decode and validate the response.
1066          */
1067         if (xdr_replymsg(xdrs, &reply_msg)) {
1068                 enum clnt_stat re_status;
1069 
1070                 _seterr_reply(&reply_msg, &(p->cku_err));
1071 
1072                 re_status = p->cku_err.re_status;
1073                 if (re_status == RPC_SUCCESS) {
1074                         /*
1075                          * Reply is good, check auth.
1076                          */
1077                         if (!AUTH_VALIDATE(h->cl_auth,
1078                             &reply_msg.acpted_rply.ar_verf)) {
1079                                 p->cku_err.re_status = RPC_AUTHERROR;
1080                                 p->cku_err.re_why = AUTH_INVALIDRESP;
1081                                 RCSTAT_INCR(rcbadverfs);
1082                                 cmn_err(CE_WARN,
1083                             "clnt_rdma_kcallit: AUTH_VALIDATE failed");
1084                         } else if (!AUTH_UNWRAP(h->cl_auth, xdrs,
1085                             xdr_results, resultsp)) {
1086                                 p->cku_err.re_status = RPC_CANTDECODERES;
1087                                 p->cku_err.re_errno = EIO;
1088                                 cmn_err(CE_WARN,
1089                                     "clnt_rdma_kcallit: AUTH_UNWRAP failed");
1090                         }
1091                 } else {
1092                         /* set errno in case we can't recover */
1093                         if (re_status != RPC_VERSMISMATCH &&
1094                             re_status != RPC_AUTHERROR &&
1095                             re_status != RPC_PROGVERSMISMATCH)
1096                                 p->cku_err.re_errno = EIO;
1097 
1098                         if (re_status == RPC_AUTHERROR) {
1099                                 /*
1100                                  * Map recoverable and unrecoverable
1101                                  * authentication errors to appropriate
1102                                  * errno
1103                                  */
1104                                 switch (p->cku_err.re_why) {
1105                                 case AUTH_BADCRED:
1106                                 case AUTH_BADVERF:
1107                                 case AUTH_INVALIDRESP:
1108                                 case AUTH_TOOWEAK:
1109                                 case AUTH_FAILED:
1110                                 case RPCSEC_GSS_NOCRED:
1111                                 case RPCSEC_GSS_FAILED:
1112                                         p->cku_err.re_errno = EACCES;
1113                                         break;
1114                                 case AUTH_REJECTEDCRED:
1115                                 case AUTH_REJECTEDVERF:
1116                                 default:
1117                                         p->cku_err.re_errno = EIO;
1118                                         break;
1119                                 }
1120                                 RPCLOG(1, "clnt_rdma_kcallit : "
1121                                     "authentication failed with "
1122                                     "RPC_AUTHERROR of type %d\n",
1123                                     p->cku_err.re_why);
1124                         }
1125                         cmn_err(CE_WARN,
1126                                     "clnt_rdma_kcallit: RPC failed");
1127 
1128                 }
1129         } else {
1130                 p->cku_err.re_status = RPC_CANTDECODERES;
1131                 p->cku_err.re_errno = EIO;
1132                 cmn_err(CE_WARN, "clnt_rdma_kcallit: xdr_replymsg failed");
1133         }
1134 
1135         /*
1136          * If rpc reply is in a chunk, free it now.
1137          */
1138 done: 
1139         if (long_reply_buf_exists){ 
1140                 (void) clist_deregister(conn, &long_reply_buf_clist, 0); 
1141 #ifndef SERVER_REG_CACHE 
1142                 kmem_free((void *)long_reply_buf_clist.c_daddr, 
1143                                 (size_t)long_reply_buf_clist.c_len); 
1144 #else 
1145           RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf_clist.long_reply_buf); 






















































1146 #endif

1147         }


1148         if (cxdrp)
1149                 XDR_DESTROY(cxdrp);
1150         if (rxdrp) {
1151                 (void) xdr_rpc_free_verifier(rxdrp, &reply_msg);
1152                 XDR_DESTROY(rxdrp);
1153         }
1154 
1155         if (recvlist) {
1156                 rdma_buf_t      recvmsg = {0}; 

1157                 recvmsg.addr = (caddr_t)(uintptr_t)recvlist->c_saddr;
1158                 recvmsg.type = RECV_BUFFER;
1159                 RDMA_BUF_FREE(conn, &recvmsg);
1160                 clist_free(recvlist);
1161         }
1162 #if (!defined(ASYNC_CLIENT_DEREG))
1163         if(rpccall_wlist){
1164                 kmem_free(rpccall_wlist, sizeof(clist));
1165         }
1166 #endif
1167 
1168         RDMA_REL_CONN(conn);
1169         if (p->cku_err.re_status != RPC_SUCCESS) {
1170                 RCSTAT_INCR(rcbadcalls);
1171         }
1172         return (p->cku_err.re_status);
1173 }
1174 
1175 static int clnt_decode_long_reply(CONN *conn,   rpcproc_t procnum,
1176                                 struct clist *long_reply_buf_clist, 
1177                                 struct clist *rdma_reply, XDR *xdrs,
1178                                 XDR **rxdrp, struct clist *cl, 
1179                                 struct clist *recvlist, 
1180                                 uint_t  op,uint_t off)
1181 {
1182         if ( RDMA_NOMSG == op && long_reply_buf_clist->c_daddr) {
1183                 if (procnum == NFSPROC3_READDIR ||
1184                     procnum == NFSPROC3_READDIRPLUS ||
1185                     procnum == NFSPROC3_READLINK) {
1186                         xdrmem_destroy(xdrs);
1187                         xdrrdma_create(xdrs,
1188                                         (caddr_t)long_reply_buf_clist->c_daddr,
1189                                         rdma_reply->c_len,
1190                                         0,
1191                                         NULL,
1192                                         XDR_DECODE,
1193                                         conn);
1194 
1195                         *rxdrp = xdrs;
1196                 } else {
1197                         cmn_err(CE_NOTE, "clnt_rdma_kcallit: "
1198                                         "wchunk buffer for wrong nfs proc");
1199                         xdrmem_destroy(xdrs);
1200                         *rxdrp = NULL;
1201                 }
1202         } else if (cl && RDMA_NOMSG == op) {
1203                 cmn_err(CE_NOTE, "clnt_rdma_kcallit: "
1204                                 "Server sent a READ list in the RPC Reply");
1205                 xdrmem_destroy(xdrs);
1206         } else {
1207                 xdrmem_destroy(xdrs);
1208                 xdrrdma_create(xdrs,
1209                                 (caddr_t)(uintptr_t)(recvlist->c_saddr + off),
1210                                 recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
1211                 *rxdrp = xdrs;
1212         }
1213         return CLNT_RDMA_SUCCESS;
1214 }
1215 
1216 #ifdef DYNAMIC_CREDIT_CONTROL
1217 static void clnt_compute_credit(CONN *conn, uint32_t *rdma_credit)
1218 {
1219        rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1220 
1221        mutex_enter(&conn->c_lock);
1222        if(cc_info->clnt_cc_granted_ops - cc_info->clnt_cc_in_flight_ops < CLNT_CREDIT_LOW)
1223                *rdma_credit = rdma_bufs_rqst + cc_info->clnt_cc_in_flight_ops / 2;
1224        mutex_exit(&conn->c_lock);
1225 }
1226 #endif
1227 
1228 static void clnt_return_credit(CONN *conn)
1229 {
1230        rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1231 
1232        mutex_enter(&conn->c_lock);
1233        cc_info->clnt_cc_in_flight_ops--;
1234        cv_signal(&cc_info->clnt_cc_cv);
1235        mutex_exit(&conn->c_lock);
1236 }
1237 
1238 static void clnt_update_credit(CONN *conn, uint32_t rdma_credit)
1239 {
1240         rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1241 
1242         /*
1243          * Get the granted number of buffers for credit control.
1244          */
1245         mutex_enter(&conn->c_lock);
1246         cc_info->clnt_cc_granted_ops = rdma_credit;
1247         mutex_exit(&conn->c_lock);
1248 }
1249 
1250 static void clnt_check_credit(CONN *conn)
1251 {
1252         rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1253 
1254         /*
1255          * Make sure we are not going over our allowed buffer use
1256          * (and make sure we have gotten a granted value before).
1257          */
1258         mutex_enter(&conn->c_lock);
1259         while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops
1260                         && cc_info->clnt_cc_granted_ops != 0) {
1261                 /*
1262                  * Client has maxed out its granted buffers due to
1263                  * credit control.  Current handling is to block and wait.
1264                  */
1265                 cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock);
1266         }
1267         cc_info->clnt_cc_in_flight_ops++;
1268         mutex_exit(&conn->c_lock);
1269 }
1270 
1271 /* ARGSUSED */
1272 static void
1273 clnt_rdma_kabort(CLIENT *h)
1274 {
1275 }
1276 
1277 static void
1278 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
1279 {
1280         struct cku_private *p = htop(h);
1281 
1282         *err = p->cku_err;
1283 }
1284 
1285 static bool_t
1286 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
1287 {
1288         struct cku_private *p = htop(h);
1289         XDR *xdrs;
1290 
1291         xdrs = &(p->cku_outxdr);
1292         xdrs->x_op = XDR_FREE;
1293         return ((*xdr_res)(xdrs, res_ptr));
1294 }
1295 
1296 /* ARGSUSED */
1297 static bool_t
1298 clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg)
1299 {
1300         return (TRUE);
1301 }
1302 
1303 /* ARGSUSED */
1304 static int
1305 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
1306         int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
1307         uint32_t xid)
1308 {
1309         RCSTAT_INCR(rctimers);
1310         return (0);
1311 }
1312 
1313 int
1314 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
1315 {
1316         rdma_registry_t *rp;
1317         void *handle = NULL;
1318         struct knetconfig *knc;
1319         char *pf, *p;
1320         rdma_stat status;
1321         int error = 0;
1322 
1323        mutex_enter(&rdma_modload_lock);
1324        error = rdma_modload();
1325        mutex_exit(&rdma_modload_lock);
1326 
1327         if (!INGLOBALZONE(curproc))
1328                 return (-1);
1329         /*
1330          * modload the RDMA plugins if not already done.
1331          */
1332         if (!rdma_modloaded) {
1333                 mutex_enter(&rdma_modload_lock);
1334                 if (!rdma_modloaded) {
1335                         error = rdma_modload();
1336                 }
1337                 mutex_exit(&rdma_modload_lock);
1338                 if (error)
1339                         return (-1);
1340         }
1341 
1342         if (!rdma_dev_available)
1343                 return (-1);
1344 
1345         rw_enter(&rdma_lock, RW_READER);
1346         rp = rdma_mod_head;
1347         while (rp != NULL) {
1348                 status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr,
1349                     &handle);
1350                 if (status == RDMA_SUCCESS) {
1351                         knc = kmem_zalloc(sizeof (struct knetconfig),
1352                                 KM_SLEEP);
1353                         knc->knc_semantics = NC_TPI_RDMA;
1354                         pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1355                         p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1356                         if (addr_type == AF_INET)
1357                                 (void) strncpy(pf, NC_INET, KNC_STRSIZE);
1358                         else if (addr_type == AF_INET6)
1359                                 (void) strncpy(pf, NC_INET6, KNC_STRSIZE);
1360                         pf[KNC_STRSIZE - 1] = '\0';
1361 
1362                         (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
1363                         p[KNC_STRSIZE - 1] = '\0';
1364 
1365                         knc->knc_protofmly = pf;
1366                         knc->knc_proto = p;
1367                         knc->knc_rdev = (dev_t)handle;
1368                         *knconf = knc;
1369                         rw_exit(&rdma_lock);
1370                         return (0);
1371                 }
1372                 rp = rp->r_next;
1373         }
1374         rw_exit(&rdma_lock);
1375         return (-1);
1376 }
1377 
1378 static void
1379 check_dereg_wlist(CONN *conn, clist *rwc)
1380 {
1381         if (rwc == NULL)
1382                 return;
1383 
1384         if (rwc) {
1385                 if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) {
1386                         int status;
1387 #if defined(ASYNC_CLIENT_DEREG)
1388                         /* Add in an entry to rqueue    */
1389                         INSERT_QUEUE(conn, rwc);        
1390 #else
1391                         status = clist_deregister(conn, rwc, FALSE);
1392                         if (status != RDMA_SUCCESS) {
1393                                 cmn_err(CE_NOTE, "dereg_wlist failed."
1394                                                 "status=%d", status);
1395                         }
1396 #endif
1397                 }
1398 
1399         }
1400 }
--- EOF ---