New clnt_rdma.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29 * Portions of this source code were derived from Berkeley
30 * 4.3 BSD under license from the Regents of the University of
31 * California.
32 */
33
34 /* Copyright (c) 2006, The Ohio State University. All rights reserved.
35 *
36 * Portions of this source code is developed by the team members of
37 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
38 * headed by Professor Dhabaleswar K. (DK) Panda.
39 *
40 * Acknowledgements to contributions from developors:
41 * Ranjit Noronha: noronha@cse.ohio-state.edu
42 * Lei Chai : chail@cse.ohio-state.edu
43 * Weikuan Yu : yuw@cse.ohio-state.edu
44 *
45 */
46
47 #pragma ident "@(#)clnt_rdma.c 1.10 05/07/26 SMI"
48
49 #include <sys/param.h>
50 #include <sys/types.h>
51 #include <sys/user.h>
52 #include <sys/systm.h>
53 #include <sys/sysmacros.h>
54 #include <sys/errno.h>
55 #include <sys/kmem.h>
56 #include <sys/debug.h>
57 #include <sys/systm.h>
58 #include <sys/kstat.h>
59 #include <sys/t_lock.h>
60 #include <sys/ddi.h>
61 #include <sys/cmn_err.h>
62 #include <sys/time.h>
63 #include <sys/isa_defs.h>
64 #include <sys/zone.h>
65
66 #include <rpc/types.h>
67 #include <rpc/xdr.h>
68 #include <rpc/auth.h>
69 #include <rpc/clnt.h>
70 #include <rpc/rpc_msg.h>
71 #include <rpc/rpc_rdma.h>
72 #include <nfs/nfs.h>
73
74 #define CLNT_CREDIT_LOW (5)
75
76 xdrproc_t x_READ3args = NULL_xdrproc_t;
77 xdrproc_t x_READ3res = NULL_xdrproc_t;
78 xdrproc_t x_READ3vres = NULL_xdrproc_t;
79 xdrproc_t x_READ3uiores = NULL_xdrproc_t;
80
81 static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST;
82
83 int rdma_wlist_verbose_debug = 0;
84 int rdma_wlist_memreg_debug = 0;
85 int rdma_wlist_clnt_debug = 0;
86 int rdma_wlist_svc_debug = 0;
87 int rdma_wlist_xdr_debug = 0;
88 int rdma_wlist_pglck_debug = 0;
89 int credit_control_debug = 0;
90 int rdma_long_reply_debug = 0;
91 int rdma_xdr_long_reply_debug = 0;
92
93 struct clist empty_cl = {0};
94
95 static void clnt_read3args_make_wlist(caddr_t, struct clist **, xdrproc_t, uint_t *);
96 static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *,
97 XDR *, xdrproc_t, caddr_t);
98 static int clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *,
99 XDR **, uint_t *);
100 static int clnt_setup_rlist(CONN *, XDR *, struct clist **);
101 static int clnt_setup_wlist(CONN *, rpcproc_t, struct clist **,
102 caddr_t, xdrproc_t, XDR *);
103 static int clnt_setup_long_reply(CONN *, rpcproc_t, struct clist *,
104 XDR *, bool_t *);
105 #ifdef DYNAMIC_CREDIT_CONTROL
106 static void clnt_compute_credit(CONN *, uint32_t *);
107 #endif
108 static void clnt_check_credit(CONN *);
109 static void clnt_return_credit(CONN *);
110 static int clnt_decode_long_reply(CONN *, rpcproc_t, struct clist *,
111 struct clist *, XDR *, XDR **, struct clist *,
112 struct clist *, uint_t,uint_t);
113
114 static void clnt_update_credit(CONN *,uint32_t);
115 static void check_dereg_wlist(CONN *, struct clist *);
116
117 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
118 caddr_t, xdrproc_t, caddr_t, struct timeval);
119 static void clnt_rdma_kabort(CLIENT *);
120 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *);
121 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
122 static void clnt_rdma_kdestroy(CLIENT *);
123 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *);
124 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
125 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
126
127 /*
128 * Operations vector for RDMA based RPC
129 */
130 static struct clnt_ops rdma_clnt_ops = {
131 clnt_rdma_kcallit, /* do rpc call */
132 clnt_rdma_kabort, /* abort call */
133 clnt_rdma_kerror, /* return error status */
134 clnt_rdma_kfreeres, /* free results */
135 clnt_rdma_kdestroy, /* destroy rpc handle */
136 clnt_rdma_kcontrol, /* the ioctl() of rpc */
137 clnt_rdma_ksettimers, /* set retry timers */
138 };
139
140 /*
141 * The size of the preserialized RPC header information.
142 */
143 #define CKU_HDRSIZE 20
144 #define CLNT_RDMA_SUCCESS 0
145 #define CLNT_RDMA_FAIL -99
146
147 /*
148 * Per RPC RDMA endpoint details
149 */
150 typedef struct cku_private {
151 CLIENT cku_client; /* client handle */
152 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */
153 void *cku_rd_handle; /* underlying RDMA device */
154 struct netbuf cku_addr; /* remote netbuf address */
155 int cku_addrfmly; /* for finding addr_type */
156 struct rpc_err cku_err; /* error status */
157 struct cred *cku_cred; /* credentials */
158 XDR cku_outxdr; /* xdr stream for output */
159 uint32_t cku_outsz;
160 XDR cku_inxdr; /* xdr stream for input */
161 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
162 uint32_t cku_xid; /* current XID */
163 } cku_private_t;
164
165 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */
166 static int clnt_rdma_min_delay = CLNT_RDMA_DELAY;
167
168 struct {
169 kstat_named_t rccalls;
170 kstat_named_t rcbadcalls;
171 kstat_named_t rcbadxids;
172 kstat_named_t rctimeouts;
173 kstat_named_t rcnewcreds;
174 kstat_named_t rcbadverfs;
175 kstat_named_t rctimers;
176 kstat_named_t rccantconn;
177 kstat_named_t rcnomem;
178 kstat_named_t rcintrs;
179 kstat_named_t rclongrpcs;
180 } rdmarcstat = {
181 { "calls", KSTAT_DATA_UINT64 },
182 { "badcalls", KSTAT_DATA_UINT64 },
183 { "badxids", KSTAT_DATA_UINT64 },
184 { "timeouts", KSTAT_DATA_UINT64 },
185 { "newcreds", KSTAT_DATA_UINT64 },
186 { "badverfs", KSTAT_DATA_UINT64 },
187 { "timers", KSTAT_DATA_UINT64 },
188 { "cantconn", KSTAT_DATA_UINT64 },
189 { "nomem", KSTAT_DATA_UINT64 },
190 { "interrupts", KSTAT_DATA_UINT64 },
191 { "longrpc", KSTAT_DATA_UINT64 }
192 };
193
194 kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat;
195 uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t);
196
197 #ifdef DEBUG
198 int rdma_clnt_debug = 0;
199 #endif
200
201 #ifdef accurate_stats
202 extern kmutex_t rdmarcstat_lock; /* mutex for rcstat updates */
203
204 #define RCSTAT_INCR(x) \
205 mutex_enter(&rdmarcstat_lock); \
206 rdmarcstat.x.value.ui64++; \
207 mutex_exit(&rdmarcstat_lock);
208 #else
209 #define RCSTAT_INCR(x) \
210 rdmarcstat.x.value.ui64++;
211 #endif
212
213 #define ptoh(p) (&((p)->cku_client))
214 #define htop(h) ((cku_private_t *)((h)->cl_private))
215
216 int
217 clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family,
218 rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl)
219 {
220 CLIENT *h;
221 struct cku_private *p;
222 struct rpc_msg call_msg;
223 rdma_registry_t *rp;
224
225 ASSERT(INGLOBALZONE(curproc));
226
227 if (cl == NULL)
228 return (EINVAL);
229 *cl = NULL;
230
231 p = kmem_zalloc(sizeof (*p), KM_SLEEP);
232
233 /*
234 * Find underlying RDMATF plugin
235 */
236 rw_enter(&rdma_lock, RW_READER);
237 rp = rdma_mod_head;
238 while (rp != NULL) {
239 if (strcmp(rp->r_mod->rdma_api, proto))
240 rp = rp->r_next;
241 else {
242 p->cku_rd_mod = rp->r_mod;
243 p->cku_rd_handle = handle;
244 break;
245 }
246 }
247 rw_exit(&rdma_lock);
248
249 if (p->cku_rd_mod == NULL) {
250 /*
251 * Should not happen.
252 * No matching RDMATF plugin.
253 */
254 kmem_free(p, sizeof (struct cku_private));
255 return (EINVAL);
256 }
257
258 h = ptoh(p);
259 h->cl_ops = &rdma_clnt_ops;
260 h->cl_private = (caddr_t)p;
261 h->cl_auth = authkern_create();
262
263 /* call message, just used to pre-serialize below */
264 call_msg.rm_xid = 0;
265 call_msg.rm_direction = CALL;
266 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
267 call_msg.rm_call.cb_prog = pgm;
268 call_msg.rm_call.cb_vers = vers;
269
270 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE);
271 /* pre-serialize call message header */
272 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
273 XDR_DESTROY(&p->cku_outxdr);
274 auth_destroy(h->cl_auth);
275 kmem_free(p, sizeof (struct cku_private));
276 return (EINVAL);
277 }
278
279 /*
280 * Set up the rpc information
281 */
282 p->cku_cred = cred;
283 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
284 p->cku_addr.maxlen = raddr->maxlen;
285 p->cku_addr.len = raddr->len;
286 bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
287 p->cku_addrfmly = family;
288
289 *cl = h;
290 return (0);
291 }
292
293 static void
294 clnt_rdma_kdestroy(CLIENT *h)
295 {
296 struct cku_private *p = htop(h);
297
298 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
299 kmem_free(p, sizeof (*p));
300 }
301
302 void
303 clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr,
304 struct cred *cred)
305 {
306 struct cku_private *p = htop(h);
307 rdma_registry_t *rp;
308
309 ASSERT(INGLOBALZONE(curproc));
310 /*
311 * Find underlying RDMATF plugin
312 */
313 p->cku_rd_mod = NULL;
314 rw_enter(&rdma_lock, RW_READER);
315 rp = rdma_mod_head;
316 while (rp != NULL) {
317 if (strcmp(rp->r_mod->rdma_api, proto))
318 rp = rp->r_next;
319 else {
320 p->cku_rd_mod = rp->r_mod;
321 p->cku_rd_handle = handle;
322 break;
323 }
324
325 }
326 rw_exit(&rdma_lock);
327
328 /*
329 * Set up the rpc information
330 */
331 p->cku_cred = cred;
332 p->cku_xid = 0;
333
334 if (p->cku_addr.maxlen < raddr->len) {
335 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
336 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
337 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
338 p->cku_addr.maxlen = raddr->maxlen;
339 }
340
341 p->cku_addr.len = raddr->len;
342 bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
343 h->cl_ops = &rdma_clnt_ops;
344 }
345
346 static int clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum,
347 rdma_buf_t *rpcmsg, XDR *xdrs,
348 xdrproc_t xdr_args, caddr_t argsp)
349 {
350 cku_private_t *p = htop(h);
351
352 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
353 /*
354 * Copy in the preserialized RPC header
355 * information.
356 */
357 bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE);
358
359 /*
360 * transaction id is the 1st thing in the output
361 * buffer.
362 */
363 /* LINTED pointer alignment */
364 (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid;
365
366 /* Skip the preserialized stuff. */
367 XDR_SETPOS(xdrs, CKU_HDRSIZE);
368
369 /* Serialize dynamic stuff into the output buffer. */
370 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
371 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
372 (!(*xdr_args)(xdrs, argsp))) {
373 cmn_err(CE_WARN,"Failed to serialize dynamic arguments\n");
374 return CLNT_RDMA_FAIL;
375 }
376 p->cku_outsz = XDR_GETPOS(xdrs);
377 } else {
378 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
379 IXDR_PUT_U_INT32(uproc, procnum);
380 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
381 XDR_SETPOS(xdrs, 0);
382
383 /* Serialize the procedure number and the arguments. */
384 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
385 CKU_HDRSIZE+4, xdrs, NULL, NULL) ||
386 !(*xdr_args)(xdrs, argsp)) {
387 if (rpcmsg->addr != xdrs->x_base) {
388 rpcmsg->addr = xdrs->x_base;
389 rpcmsg->len = xdr_getbufsize(xdrs);
390 }
391 cmn_err(CE_WARN,"Failed to serialize procedure number and the arguments.\n");
392 return CLNT_RDMA_FAIL;
393 }
394 /*
395 * If we had to allocate a new buffer while encoding
396 * then update the addr and len.
397 */
398 if (rpcmsg->addr != xdrs->x_base) {
399 rpcmsg->addr = xdrs->x_base;
400 rpcmsg->len = xdr_getbufsize(xdrs);
401 }
402
403 p->cku_outsz = XDR_GETPOS(xdrs);
404 }
405
406 return CLNT_RDMA_SUCCESS;
407 }
408
409 static int clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg,
410 XDR **xdrs, uint_t *op)
411 {
412 cku_private_t *p = htop(h);
413 uint_t vers;
414 uint32_t rdma_credit = rdma_bufs_rqst;
415
416 vers = RPCRDMA_VERS;
417 clmsg->type = SEND_BUFFER;
418
419 #ifdef DYNAMIC_CREDIT_CONTROL
420 clnt_compute_credit(conn, &rdma_credit);
421 #endif
422
423 if (RDMA_BUF_ALLOC(conn, clmsg)) {
424 return CLNT_RDMA_FAIL;
425 }
426
427 *xdrs = &p->cku_outxdr;
428 xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE);
429
430 (*(uint32_t *)clmsg->addr) = p->cku_xid;
431 XDR_SETPOS(*xdrs, sizeof (uint32_t));
432 (void) xdr_u_int(*xdrs, &vers);
433 (void) xdr_u_int(*xdrs, &rdma_credit);
434 (void) xdr_u_int(*xdrs, op);
435
436 return CLNT_RDMA_SUCCESS;
437 }
438
439 static int clnt_setup_rlist(CONN *conn, XDR *xdrs, struct clist **cl)
440 {
441 int ret;
442
443 if (*cl != NULL) {
444 ret = clist_register(conn, *cl, 1);
445 if (ret != RDMA_SUCCESS) {
446 return CLNT_RDMA_FAIL;
447 }
448 }
449 (void) xdr_do_clist(xdrs, cl);
450
451 return CLNT_RDMA_SUCCESS;
452 }
453
454 static int clnt_setup_wlist(CONN *conn, rpcproc_t procnum,
455 struct clist **rpccall_wlist, caddr_t resultsp,
456 xdrproc_t xdr_results, XDR *xdrs)
457 {
458 int status;
459 uint_t num_segment = 0;
460
461 if (procnum == NFSPROC3_READ) {
462 clnt_read3args_make_wlist(resultsp, rpccall_wlist,
463 xdr_results, &num_segment);
464 status = clist_register(conn, *rpccall_wlist, 0);
465 if (status != RDMA_SUCCESS)
466 return CLNT_RDMA_FAIL;
467 } else {
468 *rpccall_wlist = NULL;
469 }
470
471 if (! xdr_encode_wlist(xdrs, *rpccall_wlist, num_segment))
472 return CLNT_RDMA_FAIL;
473
474 return CLNT_RDMA_SUCCESS;
475 }
476
477 static int clnt_setup_long_reply(CONN *conn, rpcproc_t procnum,
478 struct clist *lrc_clist,
479 XDR *xdrs, bool_t *exists)
480 {
481 int status;
482 caddr_t addr;
483 #ifdef SERVER_REG_CACHE
484 rib_lrc_entry_t *long_reply_buf = NULL;
485 #endif
486 *exists = FALSE;
487 lrc_clist->c_daddr = NULL;
488
489 #ifdef RPC_RDMA_INLINE
490 if (lrc_clist->c_len < rdma_minchunk)
491 return CLNT_RDMA_SUCCESS;
492 #endif
493
494 if (procnum == NFSPROC3_READDIR ||
495 procnum == NFSPROC3_READDIRPLUS ||
496 procnum == NFSPROC3_READLINK) {
497 #ifndef SERVER_REG_CACHE
498 addr = kmem_alloc(LONG_REPLY_LEN, KM_SLEEP);
499 bzero(addr, LONG_REPLY_LEN);
500 lrc_clist->c_daddr = (uint64)addr;
501 lrc_clist->c_len = LONG_REPLY_LEN;
502 lrc_clist->c_next = NULL;
503 lrc_clist->long_reply_buf = NULL;
504 status = clist_register(conn, lrc_clist, 0);
505 #else
506 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, LONG_REPLY_LEN);
507 bzero(long_reply_buf->lrc_buf, LONG_REPLY_LEN);
508 lrc_clist->c_daddr = (uint64)long_reply_buf->lrc_buf;
509 lrc_clist->c_len = LONG_REPLY_LEN;
510 lrc_clist->c_next = NULL;
511 lrc_clist->long_reply_buf = (uint64)long_reply_buf;
512 lrc_clist->c_dmemhandle = long_reply_buf->lrc_mhandle;
513 status = clist_register(conn, lrc_clist, 0);
514 #endif
515 if(status) {
516 cmn_err(CE_WARN, "clnt_setup_long_reply: cannot register buffer");
517 #ifndef SERVER_REG_CACHE
518 kmem_free((void*)addr, (size_t)LONG_REPLY_LEN);
519 #else
520 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf);
521
522 #endif
523 lrc_clist->c_daddr = NULL;
524 return CLNT_RDMA_FAIL;
525 }
526 *exists = TRUE;
527 }
528
529 return CLNT_RDMA_SUCCESS;
530 }
531
532 static void
533 clnt_read3args_make_wlist(caddr_t replyp, struct clist **rpccall_wlist,
534 xdrproc_t xr, uint_t *num_segment)
535 {
536 READ3uiores *ures = (READ3uiores *)replyp;
537 READ3vres *vres = (READ3vres *)replyp;
538 struct clist *rwl = NULL, *prev = NULL;
539 int i, total_length;
540
541 *rpccall_wlist = NULL;
542
543 #ifdef RPC_RDMA_INLINE
544 if (xr == x_READ3uiores) {
545 total_length = 0;
546 for(i=0; i<ures->uiop->uio_iovcnt; i++) {
547 total_length += ures->uiop->uio_iov[i].iov_len;
548 }
549 } else {
550 total_length = vres->data.data_len;
551 }
552
553 if (total_length < rdma_minchunk)
554 return;
555 #endif
556
557 /* XXX: fake a chunk threshold for the combined length for now */
558 if (xr == x_READ3uiores) {
559 *num_segment = ures->uiop->uio_iovcnt;
560 for(i=0; i<ures->uiop->uio_iovcnt; i++) {
561 rwl = (struct clist *)kmem_zalloc(sizeof(struct clist),
562 KM_SLEEP);
563
564 rwl->c_len = ures->uiop->uio_iov[i].iov_len;
565 rwl->c_daddr = (uint64)(ures->uiop->uio_iov[i].iov_base);
566 /*
567 * if userspace address, put adspace ptr in clist.
568 * If not, then do nothing since it's already
569 * set to NULL (from empty_cl)
570 */
571 if (ures->uiop->uio_segflg == UIO_USERSPACE) {
572 int error;
573 rwl->c_adspc = ttoproc(curthread)->p_as;
574 } else {
575 rwl->c_dpplist = (page_t **)NULL;
576 }
577
578 if(prev == NULL)
579 prev = rwl;
580 else {
581 prev->c_next = rwl;
582 prev = rwl;
583 }
584
585 if(*rpccall_wlist == NULL)
586 *rpccall_wlist = rwl;
587 }
588 rwl->c_next = NULL;
589 } else if (xr == x_READ3vres) {
590 *num_segment = 1;
591 rwl = (struct clist *)kmem_zalloc(sizeof (struct clist),
592 KM_SLEEP);
593 *rwl = empty_cl;
594
595 rwl->c_len = vres->data.data_len;
596 rwl->c_daddr = (uint64)(vres->data.data_val);
597
598 if(*rpccall_wlist == NULL)
599 *rpccall_wlist = rwl;
600 } else {
601 /*cmn_err(CE_NOTE, "read3args_make_wlist: non READ3xr=%p",
602 (void *)xr);*/
603 }
604 }
605
606 /* ARGSUSED */
607 static enum clnt_stat
608 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
609 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait)
610 {
611 cku_private_t *p = htop(h);
612 int status;
613 XDR *xdrs;
614 XDR *cxdrp = NULL, callxdr; /* for xdrrdma encoding the RPC call */
615 XDR *rxdrp = NULL, replxdr; /* for xdrrdma decoding the RPC reply */
616 struct rpc_msg reply_msg;
617 struct clist *sendlist = NULL, *recvlist = NULL;
618 struct clist *cl = NULL, *cle = NULL, *rdma_reply = NULL;
619 uint_t vers, op;
620 uint_t off;
621 uint32_t xid;
622 uint32_t seg_array_len;
623 CONN *conn = NULL;
624 rdma_buf_t clmsg = {0}, rpcmsg = {0};
625 int msglen;
626 clock_t ticks;
627 bool_t wlist_exists_reply = FALSE;
628 bool_t long_reply_buf_exists = FALSE;
629
630 struct clist *rpccall_wlist = NULL, *rpcreply_wlist = NULL,
631 long_reply_clist ={0};
632 rpccall_read_t read_type;
633 rpccall_write_t write_type;
634 uint32_t rdma_credit = rdma_bufs_rqst;
635 struct clist long_reply_buf_clist = {0};
636
637 RCSTAT_INCR(rccalls);
638 /*
639 * Get unique xid
640 */
641 if (p->cku_xid == 0)
642 p->cku_xid = alloc_xid();
643
644 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr,
645 p->cku_addrfmly, p->cku_rd_handle, &conn);
646
647 if (conn == NULL) {
648 /*
649 * Connect failed to server. Could be because of one
650 * of several things. In some cases we don't want
651 * the caller to retry immediately - delay before
652 * returning to caller.
653 */
654 switch (status) {
655 case RDMA_TIMEDOUT:
656 /*
657 * Already timed out. No need to delay
658 * some more.
659 */
660 p->cku_err.re_status = RPC_TIMEDOUT;
661 p->cku_err.re_errno = ETIMEDOUT;
662 break;
663 case RDMA_INTR:
664 /*
665 * Failed because of an signal. Very likely
666 * the caller will not retry.
667 */
668 p->cku_err.re_status = RPC_INTR;
669 p->cku_err.re_errno = EINTR;
670 break;
671 default:
672 /*
673 * All other failures - server down or service
674 * down or temporary resource failure. Delay before
675 * returning to caller.
676 */
677 ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
678 p->cku_err.re_status = RPC_CANTCONNECT;
679 p->cku_err.re_errno = EIO;
680
681 if (h->cl_nosignal == TRUE) {
682 delay(ticks);
683 } else {
684 if (delay_sig(ticks) == EINTR) {
685 p->cku_err.re_status = RPC_INTR;
686 p->cku_err.re_errno = EINTR;
687 }
688 }
689 break;
690 }
691
692 return (p->cku_err.re_status);
693 }
694
695 clnt_check_credit(conn);
696
697 /*
698 * Get the size of the rpc call message. Need this
699 * to determine if the rpc call message will fit in
700 * the pre-allocated RDMA buffers. If the rpc call
701 * message length is greater that the pre-allocated
702 * buffers then, it is a Long RPC. A one time use
703 * buffer is allocated and registered for the Long
704 * RPC call.
705 */
706 xdrs = &callxdr;
707 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT;
708
709 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
710 msglen += xdrrdma_authsize(h->cl_auth, p->cku_cred,
711 rdma_minchunk);
712 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk);
713
714 if (msglen > RPC_MSG_SZ)
715 read_type = RPCCALL_RCHUNK;
716 else
717 read_type = RPCCALL_NORCHUNK;
718 } else {
719 /*
720 * For RPCSEC_GSS since we cannot accurately presize the
721 * buffer required for encoding, we assume that its going
722 * to be a Long RPC to start with. We also create the
723 * the XDR stream with min_chunk set to 0 which instructs
724 * the XDR layer to not chunk the incoming byte stream.
725 */
726
727 msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth);
728 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk);
729
730 if (msglen > RPC_MSG_SZ)
731 read_type = RPCCALL_RCHUNK;
732 else
733 read_type = RPCCALL_NORCHUNK;
734 }
735
736 if (read_type == RPCCALL_NORCHUNK) {
737
738 rpcmsg.type = SEND_BUFFER;
739 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) {
740 cmn_err(CE_WARN, "clnt_rdma_kcallit: no buffers!");
741 goto done;
742 }
743 } else {
744 #ifdef SERVER_REG_CACHE
745 rib_lrc_entry_t *long_reply_buf = NULL;
746 #endif
747 rpcmsg.type = CHUNK_BUFFER;
748 #ifdef SERVER_REG_CACHE
749 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, msglen);
750 rpcmsg.addr = long_reply_buf->lrc_buf;
751 #else
752 rpcmsg.addr = kmem_zalloc(msglen, KM_SLEEP);
753 #endif
754 cle = (struct clist *)kmem_zalloc(sizeof (struct clist),
755 KM_SLEEP);
756 cle->c_xdroff = 0;
757 cle->c_len = rpcmsg.len = msglen;
758 cle->c_saddr = (uint64)(uintptr_t)rpcmsg.addr;
759 cle->c_next = NULL;
760 #ifdef SERVER_REG_CACHE
761 cle->long_reply_buf = (uint64)long_reply_buf;
762 #endif
763 }
764
765 op = cle ? RDMA_NOMSG : RDMA_MSG;
766 cxdrp = xdrs;
767 xdrrdma_create(xdrs, rpcmsg.addr, (cle ? msglen : rpcmsg.len),
768 rdma_minchunk, cle, XDR_ENCODE, NULL);
769
770 status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, xdrs, xdr_args, argsp);
771 if (status != CLNT_RDMA_SUCCESS) {
772 rdma_buf_free(conn, &rpcmsg);
773 clist_free(cle);
774 p->cku_err.re_status = RPC_CANTENCODEARGS;
775 p->cku_err.re_errno = EIO;
776 cmn_err(CE_WARN,
777 "clnt_rdma_kcallit: clnt_compose_rpcmsg failed");
778 goto done;
779 }
780
781 /* Read chunklist (a linked list of N elements,
782 * position P (same P for all chunks of same arg!):
783 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
784 */
785
786 cl = xdrrdma_clist(xdrs);
787
788 /*
789 * Update the chunk size information for the Long RPC msg.
790 */
791 if (cl && op == RDMA_NOMSG)
792 cl->c_len = p->cku_outsz;
793
794 /*
795 * Prepare the header for the RDMA chunk
796 */
797 status = clnt_compose_rdma_header(conn, h, &clmsg, &xdrs, &op);
798 if (status != CLNT_RDMA_SUCCESS) {
799 p->cku_err.re_status = RPC_CANTSEND;
800 p->cku_err.re_errno = EIO;
801 rdma_buf_free(conn, &rpcmsg);
802 clist_free(cle);
803 RCSTAT_INCR(rcnomem);
804 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffers!!");
805 goto done;
806 }
807
808 status = clnt_setup_rlist(conn, xdrs, &cl);
809 if (status != CLNT_RDMA_SUCCESS) {
810 cmn_err(CE_WARN, "clnt_rdma_kcallit: clist register failed");
811 rdma_buf_free(conn, &clmsg);
812 rdma_buf_free(conn, &rpcmsg);
813 clist_free(cl);
814 p->cku_err.re_status = RPC_CANTSEND;
815 p->cku_err.re_errno = EIO;
816 goto done;
817 }
818
819 /* Setup write chunk list for NFS3 READ operation
820 * Other operations will have a NULL wlist
821 */
822 status = clnt_setup_wlist(conn, procnum, &rpccall_wlist,
823 resultsp, xdr_results, xdrs);
824 if (status != CLNT_RDMA_SUCCESS) {
825 rdma_buf_free(conn, &clmsg);
826 rdma_buf_free(conn, &rpcmsg);
827 clist_free(cl);
828 p->cku_err.re_status = RPC_CANTSEND;
829 p->cku_err.re_errno = EIO;
830 goto done;
831 }
832
833 status = clnt_setup_long_reply(conn, procnum, &long_reply_buf_clist,
834 xdrs, &long_reply_buf_exists);
835 if (status != CLNT_RDMA_SUCCESS) {
836 rdma_buf_free(conn, &clmsg);
837 rdma_buf_free(conn, &rpcmsg);
838 clist_free(cl);
839 p->cku_err.re_status = RPC_CANTSEND;
840 p->cku_err.re_errno = EIO;
841 goto done;
842 }
843
844 /*
845 * XDR encode the RDMA_REPLY write chunk
846 */
847 seg_array_len = (long_reply_buf_exists ? 1:0);
848 (void) xdr_encode_reply_wchunk(xdrs, &long_reply_buf_clist, seg_array_len);
849 /*
850 * Start with the RDMA header and clist (if any)
851 */
852 sendlist = NULL;
853 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle,
854 clmsg.addr, NULL, NULL);
855 /*
856 * Put the RPC call message in the send list if small RPC
857 */
858 if (op == RDMA_MSG) {
859 clist_add(&sendlist, 0, p->cku_outsz, &rpcmsg.handle,
860 rpcmsg.addr, NULL, NULL);
861 } else {
862 /* Long RPC already in chunk list */
863 RCSTAT_INCR(rclongrpcs);
864 }
865
866 /*
867 * Set up a reply buffer ready for the reply
868 */
869 status = rdma_clnt_postrecv(conn, p->cku_xid);
870 if (status != RDMA_SUCCESS) {
871 rdma_buf_free(conn, &clmsg);
872 rdma_buf_free(conn, &rpcmsg);
873 if (cl) {
874 (void) clist_deregister(conn, cl, 1);
875 clist_free(cl);
876 }
877 clist_free(sendlist);
878 p->cku_err.re_status = RPC_CANTSEND;
879 p->cku_err.re_errno = EIO;
880 goto done;
881 }
882 /*
883 * sync the memory for dma
884 */
885 if (cl != NULL) {
886 status = clist_syncmem(conn, cl, 1);
887 if (status != RDMA_SUCCESS) {
888 rdma_buf_free(conn, &clmsg);
889 rdma_buf_free(conn, &rpcmsg);
890 (void) clist_deregister(conn, cl, 1);
891 clist_free(cl);
892 clist_free(sendlist);
893 p->cku_err.re_status = RPC_CANTSEND;
894 p->cku_err.re_errno = EIO;
895 goto done;
896 }
897 }
898
899 /*
900 * Send the call message to the server
901 */
902 #if defined (CLNT_INTERRUPT_COAL)
903 status = RDMA_SEND_BL(conn, sendlist, p->cku_xid);
904 #else
905 status = RDMA_SEND(conn, sendlist, p->cku_xid);
906 #endif
907 if (status != RDMA_SUCCESS) {
908 if (cl) {
909 (void) clist_deregister(conn, cl, 1);
910 clist_free(cl);
911 /*
912 * If this was a long RPC message, need
913 * to free that buffer.
914 */
915 if (rpcmsg.type == CHUNK_BUFFER)
916 rdma_buf_free(conn, &rpcmsg);
917 }
918 clist_free(sendlist);
919 p->cku_err.re_status = RPC_CANTSEND;
920 p->cku_err.re_errno = EIO;
921 goto done;
922 } else {
923 /*
924 * RDMA plugin now owns the send msg buffers.
925 * Clear them out and don't free them here.
926 */
927 clmsg.addr = NULL;
928 if (rpcmsg.type == SEND_BUFFER)
929 rpcmsg.addr = NULL;
930 }
931 clist_free(sendlist);
932
933 /*
934 * Recv rpc reply
935 */
936 status = RDMA_RECV(conn, &recvlist, p->cku_xid);
937 clnt_return_credit(conn);
938
939 /*
940 * Deregister chunks sent. Do this only after the reply
941 * is received as that is a sure indication that the
942 * remote end has completed RDMA of the chunks.
943 */
944 if (cl != NULL) {
945 /*
946 * Deregister the chunks
947 */
948 (void) clist_deregister(conn, cl, 1);
949 clist_free(cl);
950 /*
951 * If long RPC free chunk
952 */
953 rdma_buf_free(conn, &rpcmsg);
954 }
955
956 /*
957 * Now check recv status
958 */
959 if (status != 0) {
960 if (status == RDMA_INTR) {
961 p->cku_err.re_status = RPC_INTR;
962 p->cku_err.re_errno = EINTR;
963 RCSTAT_INCR(rcintrs);
964 } else if (status == RPC_TIMEDOUT) {
965 p->cku_err.re_status = RPC_TIMEDOUT;
966 p->cku_err.re_errno = ETIMEDOUT;
967 RCSTAT_INCR(rctimeouts);
968 } else {
969 p->cku_err.re_status = RPC_CANTRECV;
970 p->cku_err.re_errno = EIO;
971 }
972 goto done;
973 }
974 /*
975 * Process the reply message.
976 *
977 * First the chunk list (if any)
978 */
979 xdrs = &(p->cku_inxdr);
980 xdrmem_create(xdrs, (caddr_t)(uintptr_t)recvlist->c_saddr,
981 recvlist->c_len, XDR_DECODE);
982 /*
983 * Treat xid as opaque (xid is the first entity
984 * in the rpc rdma message).
985 */
986 xid = *(uint32_t *)(uintptr_t)recvlist->c_saddr;
987 /* Skip xid and set the xdr position accordingly. */
988 XDR_SETPOS(xdrs, sizeof (uint32_t));
989 (void) xdr_u_int(xdrs, &vers);
990 (void) xdr_u_int(xdrs, &rdma_credit);
991 (void) xdr_u_int(xdrs, &op);
992 (void) xdr_do_clist(xdrs, &cl);
993 clnt_update_credit(conn, rdma_credit);
994 wlist_exists_reply = FALSE;
995 if (! xdr_decode_wlist(xdrs, &rpcreply_wlist, &wlist_exists_reply)) {
996 cmn_err(CE_NOTE,
997 "clnt_rdma_kcallit: xdr_decode_wlist failed");
998 /* XXX: what should we fail with here -- EIO? */
999 }
1000 #ifdef RPC_RDMA_INLINE
1001 if (xdr_results == x_READ3vres) {
1002 ((READ3vres *)resultsp)->wlist = NULL;
1003 } else if (xdr_results == x_READ3uiores) {
1004 ((READ3uiores *)resultsp)->wlist = NULL;
1005 }
1006 #endif
1007
1008 if (procnum == NFSPROC3_READ) {
1009
1010 check_dereg_wlist(conn, rpccall_wlist);
1011
1012 if (wlist_exists_reply) {
1013 if (xdr_results == x_READ3vres) {
1014 ((READ3vres *)resultsp)->wlist =
1015 rpcreply_wlist;
1016 ((READ3vres *)resultsp)->wlist_len =
1017 rpcreply_wlist->c_len;
1018 } else if (xdr_results == x_READ3uiores) {
1019 ((READ3uiores *)resultsp)->wlist =
1020 rpcreply_wlist;
1021 ((READ3uiores *)resultsp)->wlist_len =
1022 rpcreply_wlist->c_len;
1023 } else {
1024 cmn_err(CE_NOTE,
1025 "unknown READ3 xdr decode fnp=%p",
1026 (void *)xdr_results);
1027 }
1028 }
1029 } else {
1030 if(wlist_exists_reply)
1031 cmn_err(CE_NOTE,
1032 "clnt_rdma_kcallit: received wlist for "
1033 "non-READ3 call. reply xdr decode fnp=%p",
1034 (void *)xdr_results);
1035 }
1036
1037 /*
1038 * The server shouldn't have sent a RDMA_SEND that
1039 * the client needs to RDMA_WRITE a reply back to
1040 * the server. So silently ignoring what the
1041 * server returns in the rdma_reply section of the
1042 * header.
1043 */
1044 (void) xdr_decode_reply_wchunk(xdrs, &rdma_reply,conn);
1045 off = xdr_getpos(xdrs);
1046
1047 xdrs = &replxdr;
1048 if (clnt_decode_long_reply(conn, procnum, &long_reply_buf_clist,
1049 rdma_reply, xdrs, &rxdrp,
1050 cl, recvlist, op, off) != CLNT_RDMA_SUCCESS)
1051 {
1052 goto done;
1053 }
1054 reply_msg.rm_direction = REPLY;
1055 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
1056 reply_msg.acpted_rply.ar_stat = SUCCESS;
1057 reply_msg.acpted_rply.ar_verf = _null_auth;
1058 /*
1059 * xdr_results will be done in AUTH_UNWRAP.
1060 */
1061 reply_msg.acpted_rply.ar_results.where = NULL;
1062 reply_msg.acpted_rply.ar_results.proc = xdr_void;
1063
1064 /*
1065 * Decode and validate the response.
1066 */
1067 if (xdr_replymsg(xdrs, &reply_msg)) {
1068 enum clnt_stat re_status;
1069
1070 _seterr_reply(&reply_msg, &(p->cku_err));
1071
1072 re_status = p->cku_err.re_status;
1073 if (re_status == RPC_SUCCESS) {
1074 /*
1075 * Reply is good, check auth.
1076 */
1077 if (!AUTH_VALIDATE(h->cl_auth,
1078 &reply_msg.acpted_rply.ar_verf)) {
1079 p->cku_err.re_status = RPC_AUTHERROR;
1080 p->cku_err.re_why = AUTH_INVALIDRESP;
1081 RCSTAT_INCR(rcbadverfs);
1082 cmn_err(CE_WARN,
1083 "clnt_rdma_kcallit: AUTH_VALIDATE failed");
1084 } else if (!AUTH_UNWRAP(h->cl_auth, xdrs,
1085 xdr_results, resultsp)) {
1086 p->cku_err.re_status = RPC_CANTDECODERES;
1087 p->cku_err.re_errno = EIO;
1088 cmn_err(CE_WARN,
1089 "clnt_rdma_kcallit: AUTH_UNWRAP failed");
1090 }
1091 } else {
1092 /* set errno in case we can't recover */
1093 if (re_status != RPC_VERSMISMATCH &&
1094 re_status != RPC_AUTHERROR &&
1095 re_status != RPC_PROGVERSMISMATCH)
1096 p->cku_err.re_errno = EIO;
1097
1098 if (re_status == RPC_AUTHERROR) {
1099 /*
1100 * Map recoverable and unrecoverable
1101 * authentication errors to appropriate
1102 * errno
1103 */
1104 switch (p->cku_err.re_why) {
1105 case AUTH_BADCRED:
1106 case AUTH_BADVERF:
1107 case AUTH_INVALIDRESP:
1108 case AUTH_TOOWEAK:
1109 case AUTH_FAILED:
1110 case RPCSEC_GSS_NOCRED:
1111 case RPCSEC_GSS_FAILED:
1112 p->cku_err.re_errno = EACCES;
1113 break;
1114 case AUTH_REJECTEDCRED:
1115 case AUTH_REJECTEDVERF:
1116 default:
1117 p->cku_err.re_errno = EIO;
1118 break;
1119 }
1120 RPCLOG(1, "clnt_rdma_kcallit : "
1121 "authentication failed with "
1122 "RPC_AUTHERROR of type %d\n",
1123 p->cku_err.re_why);
1124 }
1125 cmn_err(CE_WARN,
1126 "clnt_rdma_kcallit: RPC failed");
1127
1128 }
1129 } else {
1130 p->cku_err.re_status = RPC_CANTDECODERES;
1131 p->cku_err.re_errno = EIO;
1132 cmn_err(CE_WARN, "clnt_rdma_kcallit: xdr_replymsg failed");
1133 }
1134
1135 /*
1136 * If rpc reply is in a chunk, free it now.
1137 */
1138 done:
1139 if (long_reply_buf_exists){
1140 (void) clist_deregister(conn, &long_reply_buf_clist, 0);
1141 #ifndef SERVER_REG_CACHE
1142 kmem_free((void *)long_reply_buf_clist.c_daddr,
1143 (size_t)long_reply_buf_clist.c_len);
1144 #else
1145 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf_clist.long_reply_buf);
1146 #endif
1147 }
1148 if (cxdrp)
1149 XDR_DESTROY(cxdrp);
1150 if (rxdrp) {
1151 (void) xdr_rpc_free_verifier(rxdrp, &reply_msg);
1152 XDR_DESTROY(rxdrp);
1153 }
1154
1155 if (recvlist) {
1156 rdma_buf_t recvmsg = {0};
1157 recvmsg.addr = (caddr_t)(uintptr_t)recvlist->c_saddr;
1158 recvmsg.type = RECV_BUFFER;
1159 RDMA_BUF_FREE(conn, &recvmsg);
1160 clist_free(recvlist);
1161 }
1162 #if (!defined(ASYNC_CLIENT_DEREG))
1163 if(rpccall_wlist){
1164 kmem_free(rpccall_wlist, sizeof(clist));
1165 }
1166 #endif
1167
1168 RDMA_REL_CONN(conn);
1169 if (p->cku_err.re_status != RPC_SUCCESS) {
1170 RCSTAT_INCR(rcbadcalls);
1171 }
1172 return (p->cku_err.re_status);
1173 }
1174
1175 static int clnt_decode_long_reply(CONN *conn, rpcproc_t procnum,
1176 struct clist *long_reply_buf_clist,
1177 struct clist *rdma_reply, XDR *xdrs,
1178 XDR **rxdrp, struct clist *cl,
1179 struct clist *recvlist,
1180 uint_t op,uint_t off)
1181 {
1182 if ( RDMA_NOMSG == op && long_reply_buf_clist->c_daddr) {
1183 if (procnum == NFSPROC3_READDIR ||
1184 procnum == NFSPROC3_READDIRPLUS ||
1185 procnum == NFSPROC3_READLINK) {
1186 xdrmem_destroy(xdrs);
1187 xdrrdma_create(xdrs,
1188 (caddr_t)long_reply_buf_clist->c_daddr,
1189 rdma_reply->c_len,
1190 0,
1191 NULL,
1192 XDR_DECODE,
1193 conn);
1194
1195 *rxdrp = xdrs;
1196 } else {
1197 cmn_err(CE_NOTE, "clnt_rdma_kcallit: "
1198 "wchunk buffer for wrong nfs proc");
1199 xdrmem_destroy(xdrs);
1200 *rxdrp = NULL;
1201 }
1202 } else if (cl && RDMA_NOMSG == op) {
1203 cmn_err(CE_NOTE, "clnt_rdma_kcallit: "
1204 "Server sent a READ list in the RPC Reply");
1205 xdrmem_destroy(xdrs);
1206 } else {
1207 xdrmem_destroy(xdrs);
1208 xdrrdma_create(xdrs,
1209 (caddr_t)(uintptr_t)(recvlist->c_saddr + off),
1210 recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
1211 *rxdrp = xdrs;
1212 }
1213 return CLNT_RDMA_SUCCESS;
1214 }
1215
1216 #ifdef DYNAMIC_CREDIT_CONTROL
1217 static void clnt_compute_credit(CONN *conn, uint32_t *rdma_credit)
1218 {
1219 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1220
1221 mutex_enter(&conn->c_lock);
1222 if(cc_info->clnt_cc_granted_ops - cc_info->clnt_cc_in_flight_ops < CLNT_CREDIT_LOW)
1223 *rdma_credit = rdma_bufs_rqst + cc_info->clnt_cc_in_flight_ops / 2;
1224 mutex_exit(&conn->c_lock);
1225 }
1226 #endif
1227
1228 static void clnt_return_credit(CONN *conn)
1229 {
1230 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1231
1232 mutex_enter(&conn->c_lock);
1233 cc_info->clnt_cc_in_flight_ops--;
1234 cv_signal(&cc_info->clnt_cc_cv);
1235 mutex_exit(&conn->c_lock);
1236 }
1237
1238 static void clnt_update_credit(CONN *conn, uint32_t rdma_credit)
1239 {
1240 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1241
1242 /*
1243 * Get the granted number of buffers for credit control.
1244 */
1245 mutex_enter(&conn->c_lock);
1246 cc_info->clnt_cc_granted_ops = rdma_credit;
1247 mutex_exit(&conn->c_lock);
1248 }
1249
1250 static void clnt_check_credit(CONN *conn)
1251 {
1252 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1253
1254 /*
1255 * Make sure we are not going over our allowed buffer use
1256 * (and make sure we have gotten a granted value before).
1257 */
1258 mutex_enter(&conn->c_lock);
1259 while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops
1260 && cc_info->clnt_cc_granted_ops != 0) {
1261 /*
1262 * Client has maxed out its granted buffers due to
1263 * credit control. Current handling is to block and wait.
1264 */
1265 cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock);
1266 }
1267 cc_info->clnt_cc_in_flight_ops++;
1268 mutex_exit(&conn->c_lock);
1269 }
1270
1271 /* ARGSUSED */
1272 static void
1273 clnt_rdma_kabort(CLIENT *h)
1274 {
1275 }
1276
1277 static void
1278 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
1279 {
1280 struct cku_private *p = htop(h);
1281
1282 *err = p->cku_err;
1283 }
1284
1285 static bool_t
1286 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
1287 {
1288 struct cku_private *p = htop(h);
1289 XDR *xdrs;
1290
1291 xdrs = &(p->cku_outxdr);
1292 xdrs->x_op = XDR_FREE;
1293 return ((*xdr_res)(xdrs, res_ptr));
1294 }
1295
1296 /* ARGSUSED */
1297 static bool_t
1298 clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg)
1299 {
1300 return (TRUE);
1301 }
1302
1303 /* ARGSUSED */
1304 static int
1305 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
1306 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
1307 uint32_t xid)
1308 {
1309 RCSTAT_INCR(rctimers);
1310 return (0);
1311 }
1312
1313 int
1314 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
1315 {
1316 rdma_registry_t *rp;
1317 void *handle = NULL;
1318 struct knetconfig *knc;
1319 char *pf, *p;
1320 rdma_stat status;
1321 int error = 0;
1322
1323 mutex_enter(&rdma_modload_lock);
1324 error = rdma_modload();
1325 mutex_exit(&rdma_modload_lock);
1326
1327 if (!INGLOBALZONE(curproc))
1328 return (-1);
1329 /*
1330 * modload the RDMA plugins if not already done.
1331 */
1332 if (!rdma_modloaded) {
1333 mutex_enter(&rdma_modload_lock);
1334 if (!rdma_modloaded) {
1335 error = rdma_modload();
1336 }
1337 mutex_exit(&rdma_modload_lock);
1338 if (error)
1339 return (-1);
1340 }
1341
1342 if (!rdma_dev_available)
1343 return (-1);
1344
1345 rw_enter(&rdma_lock, RW_READER);
1346 rp = rdma_mod_head;
1347 while (rp != NULL) {
1348 status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr,
1349 &handle);
1350 if (status == RDMA_SUCCESS) {
1351 knc = kmem_zalloc(sizeof (struct knetconfig),
1352 KM_SLEEP);
1353 knc->knc_semantics = NC_TPI_RDMA;
1354 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1355 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1356 if (addr_type == AF_INET)
1357 (void) strncpy(pf, NC_INET, KNC_STRSIZE);
1358 else if (addr_type == AF_INET6)
1359 (void) strncpy(pf, NC_INET6, KNC_STRSIZE);
1360 pf[KNC_STRSIZE - 1] = '\0';
1361
1362 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
1363 p[KNC_STRSIZE - 1] = '\0';
1364
1365 knc->knc_protofmly = pf;
1366 knc->knc_proto = p;
1367 knc->knc_rdev = (dev_t)handle;
1368 *knconf = knc;
1369 rw_exit(&rdma_lock);
1370 return (0);
1371 }
1372 rp = rp->r_next;
1373 }
1374 rw_exit(&rdma_lock);
1375 return (-1);
1376 }
1377
1378 static void
1379 check_dereg_wlist(CONN *conn, clist *rwc)
1380 {
1381 if (rwc == NULL)
1382 return;
1383
1384 if (rwc) {
1385 if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) {
1386 int status;
1387 #if defined(ASYNC_CLIENT_DEREG)
1388 /* Add in an entry to rqueue */
1389 INSERT_QUEUE(conn, rwc);
1390 #else
1391 status = clist_deregister(conn, rwc, FALSE);
1392 if (status != RDMA_SUCCESS) {
1393 cmn_err(CE_NOTE, "dereg_wlist failed."
1394 "status=%d", status);
1395 }
1396 #endif
1397 }
1398
1399 }
1400 }