14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29 * Portions of this source code were derived from Berkeley
30 * 4.3 BSD under license from the Regents of the University of
31 * California.
32 */
33
34 #pragma ident "@(#)clnt_rdma.c 1.10 05/07/26 SMI"
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/user.h>
39 #include <sys/systm.h>
40 #include <sys/sysmacros.h>
41 #include <sys/errno.h>
42 #include <sys/kmem.h>
43 #include <sys/debug.h>
44 #include <sys/systm.h>
45 #include <sys/kstat.h>
46 #include <sys/t_lock.h>
47 #include <sys/ddi.h>
48 #include <sys/cmn_err.h>
49 #include <sys/time.h>
50 #include <sys/isa_defs.h>
51 #include <sys/zone.h>
52
53 #include <rpc/types.h>
54 #include <rpc/xdr.h>
55 #include <rpc/auth.h>
56 #include <rpc/clnt.h>
57 #include <rpc/rpc_msg.h>
58 #include <rpc/rpc_rdma.h>
59
60
61 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
62 caddr_t, xdrproc_t, caddr_t, struct timeval);
63 static void clnt_rdma_kabort(CLIENT *);
64 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *);
65 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
66 static void clnt_rdma_kdestroy(CLIENT *);
67 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *);
68 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
69 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
70
71 /*
72 * Operations vector for RDMA based RPC
73 */
74 static struct clnt_ops rdma_clnt_ops = {
75 clnt_rdma_kcallit, /* do rpc call */
76 clnt_rdma_kabort, /* abort call */
77 clnt_rdma_kerror, /* return error status */
78 clnt_rdma_kfreeres, /* free results */
79 clnt_rdma_kdestroy, /* destroy rpc handle */
80 clnt_rdma_kcontrol, /* the ioctl() of rpc */
81 clnt_rdma_ksettimers, /* set retry timers */
82 };
83
84 /*
85 * The size of the preserialized RPC header information.
86 */
87 #define CKU_HDRSIZE 20
88
89 /*
90 * Per RPC RDMA endpoint details
91 */
92 typedef struct cku_private {
93 CLIENT cku_client; /* client handle */
94 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */
95 void *cku_rd_handle; /* underlying RDMA device */
96 struct netbuf cku_addr; /* remote netbuf address */
97 int cku_addrfmly; /* for finding addr_type */
98 struct rpc_err cku_err; /* error status */
99 struct cred *cku_cred; /* credentials */
100 XDR cku_outxdr; /* xdr stream for output */
101 uint32_t cku_outsz;
102 XDR cku_inxdr; /* xdr stream for input */
103 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
104 uint32_t cku_xid; /* current XID */
105 } cku_private_t;
106
107 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */
268 rw_exit(&rdma_lock);
269
270 /*
271 * Set up the rpc information
272 */
273 p->cku_cred = cred;
274 p->cku_xid = 0;
275
276 if (p->cku_addr.maxlen < raddr->len) {
277 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
278 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
279 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
280 p->cku_addr.maxlen = raddr->maxlen;
281 }
282
283 p->cku_addr.len = raddr->len;
284 bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
285 h->cl_ops = &rdma_clnt_ops;
286 }
287
288 /* ARGSUSED */
289 static enum clnt_stat
290 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
291 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait)
292 {
293 cku_private_t *p = htop(h);
294 int status;
295 XDR *xdrs;
296 XDR *cxdrp = NULL, callxdr; /* for xdrrdma encoding the RPC call */
297 XDR *rxdrp = NULL, replxdr; /* for xdrrdma decoding the RPC reply */
298 struct rpc_msg reply_msg;
299 struct clist *sendlist, *recvlist = NULL;
300 struct clist *cl = NULL, *cle = NULL;
301 uint_t vers, op;
302 uint_t off;
303 uint32_t xid;
304 CONN *conn = NULL;
305 rdma_buf_t clmsg, rpcmsg, longmsg, rpcreply;
306 int msglen;
307 clock_t ticks;
308
309 RCSTAT_INCR(rccalls);
310 /*
311 * Get unique xid
312 */
313 if (p->cku_xid == 0)
314 p->cku_xid = alloc_xid();
315
316 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr,
317 p->cku_addrfmly, p->cku_rd_handle, &conn);
318
319 if (conn == NULL) {
320 /*
321 * Connect failed to server. Could be because of one
322 * of several things. In some cases we don't want
323 * the caller to retry immediately - delay before
324 * returning to caller.
325 */
326 switch (status) {
327 case RDMA_TIMEDOUT:
328 /*
346 * down or temporary resource failure. Delay before
347 * returning to caller.
348 */
349 ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
350 p->cku_err.re_status = RPC_CANTCONNECT;
351 p->cku_err.re_errno = EIO;
352
353 if (h->cl_nosignal == TRUE) {
354 delay(ticks);
355 } else {
356 if (delay_sig(ticks) == EINTR) {
357 p->cku_err.re_status = RPC_INTR;
358 p->cku_err.re_errno = EINTR;
359 }
360 }
361 break;
362 }
363
364 return (p->cku_err.re_status);
365 }
366 /*
367 * Get the size of the rpc call message. Need this
368 * to determine if the rpc call message will fit in
369 * the pre-allocated RDMA buffers. If the rpc call
370 * message length is greater that the pre-allocated
371 * buffers then, it is a Long RPC. A one time use
372 * buffer is allocated and registered for the Long
373 * RPC call.
374 */
375 xdrs = &callxdr;
376 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT;
377 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
378 msglen += xdrrdma_authsize(h->cl_auth, p->cku_cred,
379 rdma_minchunk);
380 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk);
381
382 if (msglen > RPC_MSG_SZ) {
383
384 /*
385 * Long RPC. Allocate one time use custom buffer.
386 */
387 rpcmsg.type = CHUNK_BUFFER;
388 rpcmsg.addr = kmem_zalloc(msglen, KM_SLEEP);
389 cle = kmem_zalloc(sizeof (*cle), KM_SLEEP);
390 cle->c_xdroff = 0;
391 cle->c_len = rpcmsg.len = msglen;
392 cle->c_saddr = (uint64)(uintptr_t)rpcmsg.addr;
393 cle->c_next = NULL;
394 xdrrdma_create(xdrs, rpcmsg.addr, msglen,
395 rdma_minchunk, cle, XDR_ENCODE, NULL);
396 cxdrp = xdrs;
397 op = RDMA_NOMSG;
398 } else {
399 /*
400 * Get a pre-allocated buffer for rpc call
401 */
402 rpcmsg.type = SEND_BUFFER;
403 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) {
404 p->cku_err.re_status = RPC_CANTSEND;
405 p->cku_err.re_errno = EIO;
406 RCSTAT_INCR(rcnomem);
407 cmn_err(CE_WARN,
408 "clnt_rdma_kcallit: no buffers!");
409 goto done;
410 }
411 xdrrdma_create(xdrs, rpcmsg.addr, rpcmsg.len,
412 rdma_minchunk, NULL, XDR_ENCODE, NULL);
413 cxdrp = xdrs;
414 op = RDMA_MSG;
415 }
416 } else {
417 /*
418 * For RPCSEC_GSS since we cannot accurately presize the
419 * buffer required for encoding, we assume that its going
420 * to be a Long RPC to start with. We also create the
421 * the XDR stream with min_chunk set to 0 which instructs
422 * the XDR layer to not chunk the incoming byte stream.
423 */
424
425 msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth);
426 msglen += xdr_sizeof(xdr_args, argsp);
427
428 /*
429 * Long RPC. Allocate one time use custom buffer.
430 */
431 longmsg.type = CHUNK_BUFFER;
432 longmsg.addr = kmem_zalloc(msglen, KM_SLEEP);
433 cle = kmem_zalloc(sizeof (*cle), KM_SLEEP);
434 cle->c_xdroff = 0;
435 cle->c_len = longmsg.len = msglen;
436 cle->c_saddr = (uint64)(uintptr_t)longmsg.addr;
437 cle->c_next = NULL;
438 xdrrdma_create(xdrs, longmsg.addr, msglen, 0, cle,
439 XDR_ENCODE, NULL);
440 cxdrp = xdrs;
441 op = RDMA_NOMSG;
442 }
443
444 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
445 /*
446 * Copy in the preserialized RPC header
447 * information.
448 */
449 bcopy(p->cku_rpchdr, rpcmsg.addr, CKU_HDRSIZE);
450
451 /*
452 * transaction id is the 1st thing in the output
453 * buffer.
454 */
455 /* LINTED pointer alignment */
456 (*(uint32_t *)(rpcmsg.addr)) = p->cku_xid;
457
458 /* Skip the preserialized stuff. */
459 XDR_SETPOS(xdrs, CKU_HDRSIZE);
460
461 /* Serialize dynamic stuff into the output buffer. */
462 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
463 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
464 (!(*xdr_args)(xdrs, argsp))) {
465 rdma_buf_free(conn, &rpcmsg);
466 if (cle)
467 clist_free(cle);
468 p->cku_err.re_status = RPC_CANTENCODEARGS;
469 p->cku_err.re_errno = EIO;
470 cmn_err(CE_WARN,
471 "clnt_rdma_kcallit: XDR_PUTINT32/AUTH_MARSHAL/xdr_args failed");
472 goto done;
473 }
474 p->cku_outsz = XDR_GETPOS(xdrs);
475 } else {
476 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
477 IXDR_PUT_U_INT32(uproc, procnum);
478 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
479 XDR_SETPOS(xdrs, 0);
480
481 /* Serialize the procedure number and the arguments. */
482 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
483 CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) {
484 if (longmsg.addr != xdrs->x_base) {
485 longmsg.addr = xdrs->x_base;
486 longmsg.len = xdr_getbufsize(xdrs);
487 }
488 rdma_buf_free(conn, &longmsg);
489 clist_free(cle);
490 p->cku_err.re_status = RPC_CANTENCODEARGS;
491 p->cku_err.re_errno = EIO;
492 cmn_err(CE_WARN,
493 "clnt_rdma_kcallit: AUTH_WRAP failed");
494 goto done;
495 }
496 /*
497 * If we had to allocate a new buffer while encoding
498 * then update the addr and len.
499 */
500 if (longmsg.addr != xdrs->x_base) {
501 longmsg.addr = xdrs->x_base;
502 longmsg.len = xdr_getbufsize(xdrs);
503 }
504
505 /*
506 * If it so happens that the encoded message is after all
507 * not long enough to be a Long RPC then allocate a
508 * SEND_BUFFER and copy the encoded message into it.
509 */
510 p->cku_outsz = XDR_GETPOS(xdrs);
511 if (p->cku_outsz > RPC_MSG_SZ) {
512 rpcmsg.type = CHUNK_BUFFER;
513 rpcmsg.addr = longmsg.addr;
514 rpcmsg.len = longmsg.len;
515 } else {
516 clist_free(cle);
517 XDR_DESTROY(cxdrp);
518 cxdrp = NULL;
519 /*
520 * Get a pre-allocated buffer for rpc call
521 */
522 rpcmsg.type = SEND_BUFFER;
523 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) {
524 p->cku_err.re_status = RPC_CANTSEND;
525 p->cku_err.re_errno = EIO;
526 RCSTAT_INCR(rcnomem);
527 cmn_err(CE_WARN,
528 "clnt_rdma_kcallit: no buffers!");
529 rdma_buf_free(conn, &longmsg);
530 goto done;
531 }
532 bcopy(longmsg.addr, rpcmsg.addr, p->cku_outsz);
533 xdrrdma_create(xdrs, rpcmsg.addr, p->cku_outsz, 0,
534 NULL, XDR_ENCODE, NULL);
535 cxdrp = xdrs;
536 rdma_buf_free(conn, &longmsg);
537 op = RDMA_MSG;
538 }
539 }
540
541 cl = xdrrdma_clist(xdrs);
542
543 /*
544 * Update the chunk size information for the Long RPC msg.
545 */
546 if (cl && op == RDMA_NOMSG)
547 cl->c_len = p->cku_outsz;
548
549 /*
550 * Set up the RDMA chunk message
551 */
552 vers = RPCRDMA_VERS;
553 clmsg.type = SEND_BUFFER;
554 if (RDMA_BUF_ALLOC(conn, &clmsg)) {
555 p->cku_err.re_status = RPC_CANTSEND;
556 p->cku_err.re_errno = EIO;
557 rdma_buf_free(conn, &rpcmsg);
558 RCSTAT_INCR(rcnomem);
559 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffers!!");
560 goto done;
561 }
562 xdrs = &p->cku_outxdr;
563 xdrmem_create(xdrs, clmsg.addr, clmsg.len, XDR_ENCODE);
564 /*
565 * Treat xid as opaque (xid is the first entity
566 * in the rpc rdma message).
567 */
568 (*(uint32_t *)clmsg.addr) = p->cku_xid;
569 /* Skip xid and set the xdr position accordingly. */
570 XDR_SETPOS(xdrs, sizeof (uint32_t));
571 (void) xdr_u_int(xdrs, &vers);
572 (void) xdr_u_int(xdrs, &op);
573
574 /*
575 * Now XDR the chunk list
576 */
577 if (cl != NULL) {
578
579 /*
580 * Register the chunks in the list
581 */
582 status = clist_register(conn, cl, 1);
583 if (status != RDMA_SUCCESS) {
584 cmn_err(CE_WARN,
585 "clnt_rdma_kcallit: clist register failed");
586 rdma_buf_free(conn, &clmsg);
587 rdma_buf_free(conn, &rpcmsg);
588 clist_free(cl);
589 p->cku_err.re_status = RPC_CANTSEND;
590 p->cku_err.re_errno = EIO;
591 goto done;
592 }
593
594 }
595 (void) xdr_do_clist(xdrs, &cl);
596
597 /*
598 * Start with the RDMA header and clist (if any)
599 */
600 sendlist = NULL;
601 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle,
602 clmsg.addr, NULL, NULL);
603
604 /*
605 * Put the RPC call message in the send list if small RPC
606 */
607 if (op == RDMA_MSG) {
608 clist_add(&sendlist, 0, p->cku_outsz, &rpcmsg.handle,
609 rpcmsg.addr, NULL, NULL);
610 } else {
611 /* Long RPC already in chunk list */
612 RCSTAT_INCR(rclongrpcs);
613 }
614
615 /*
616 * Set up a reply buffer ready for the reply
617 */
618 status = rdma_clnt_postrecv(conn, p->cku_xid);
619 if (status != RDMA_SUCCESS) {
620 rdma_buf_free(conn, &clmsg);
621 rdma_buf_free(conn, &rpcmsg);
622 if (cl) {
623 (void) clist_deregister(conn, cl, 1);
631 /*
632 * sync the memory for dma
633 */
634 if (cl != NULL) {
635 status = clist_syncmem(conn, cl, 1);
636 if (status != RDMA_SUCCESS) {
637 rdma_buf_free(conn, &clmsg);
638 rdma_buf_free(conn, &rpcmsg);
639 (void) clist_deregister(conn, cl, 1);
640 clist_free(cl);
641 clist_free(sendlist);
642 p->cku_err.re_status = RPC_CANTSEND;
643 p->cku_err.re_errno = EIO;
644 goto done;
645 }
646 }
647
648 /*
649 * Send the call message to the server
650 */
651 status = RDMA_SEND(conn, sendlist, p->cku_xid);
652 if (status != RDMA_SUCCESS) {
653 if (cl) {
654 (void) clist_deregister(conn, cl, 1);
655 clist_free(cl);
656 /*
657 * If this was a long RPC message, need
658 * to free that buffer.
659 */
660 if (rpcmsg.type == CHUNK_BUFFER)
661 rdma_buf_free(conn, &rpcmsg);
662 }
663 clist_free(sendlist);
664 p->cku_err.re_status = RPC_CANTSEND;
665 p->cku_err.re_errno = EIO;
666 goto done;
667 } else {
668 /*
669 * RDMA plugin now owns the send msg buffers.
670 * Clear them out and don't free them here.
671 */
672 clmsg.addr = NULL;
673 if (rpcmsg.type == SEND_BUFFER)
674 rpcmsg.addr = NULL;
675 }
676 clist_free(sendlist);
677 #ifdef DEBUG
678 if (rdma_clnt_debug) {
679 printf("clnt_rdma_kcallit: send request xid %u\n", p->cku_xid);
680 }
681 #endif
682
683 /*
684 * Recv rpc reply
685 */
686 status = RDMA_RECV(conn, &recvlist, p->cku_xid);
687
688 /*
689 * Deregister chunks sent. Do this only after the reply
690 * is received as that is a sure indication that the
691 * remote end has completed RDMA of the chunks.
692 */
693 if (cl != NULL) {
694 /*
695 * Deregister the chunks
696 */
697 (void) clist_deregister(conn, cl, 1);
698 clist_free(cl);
699 /*
700 * If long RPC free chunk
701 */
702 rdma_buf_free(conn, &rpcmsg);
703 }
704
705 /*
706 * Now check recv status
707 */
708 if (status != 0) {
709 #ifdef DEBUG
710 if (rdma_clnt_debug)
711 cmn_err(CE_NOTE,
712 "clnt_rdma_kcallit: reply failed %u status %d",
713 p->cku_xid, status);
714 #endif
715 if (status == RDMA_INTR) {
716 p->cku_err.re_status = RPC_INTR;
717 p->cku_err.re_errno = EINTR;
718 RCSTAT_INCR(rcintrs);
719 } else if (status == RPC_TIMEDOUT) {
720 p->cku_err.re_status = RPC_TIMEDOUT;
721 p->cku_err.re_errno = ETIMEDOUT;
722 RCSTAT_INCR(rctimeouts);
723 } else {
724 p->cku_err.re_status = RPC_CANTRECV;
725 p->cku_err.re_errno = EIO;
726 }
727 goto done;
728 }
729 #ifdef DEBUG
730 if (rdma_clnt_debug)
731 printf("clnt_rdma_kcallit: got response xid %u\n", p->cku_xid);
732 #endif
733 /*
734 * Process the reply message.
735 *
736 * First the chunk list (if any)
737 */
738 xdrs = &(p->cku_inxdr);
739 xdrmem_create(xdrs, (caddr_t)(uintptr_t)recvlist->c_saddr,
740 recvlist->c_len, XDR_DECODE);
741 /*
742 * Treat xid as opaque (xid is the first entity
743 * in the rpc rdma message).
744 */
745 xid = *(uint32_t *)(uintptr_t)recvlist->c_saddr;
746 /* Skip xid and set the xdr position accordingly. */
747 XDR_SETPOS(xdrs, sizeof (uint32_t));
748 (void) xdr_u_int(xdrs, &vers);
749 (void) xdr_u_int(xdrs, &op);
750 (void) xdr_do_clist(xdrs, &cl);
751 off = xdr_getpos(xdrs);
752
753 /*
754 * Now the RPC reply message itself. If the reply
755 * came as a chunk item, then RDMA the reply over.
756 */
757 xdrs = &replxdr;
758 if (cl && op == RDMA_NOMSG) {
759 struct clist *cle = cl;
760
761 rpcreply.type = CHUNK_BUFFER;
762 rpcreply.addr = kmem_alloc(cle->c_len, KM_SLEEP);
763 rpcreply.len = cle->c_len;
764 cle->c_daddr = (uint64)(uintptr_t)rpcreply.addr;
765 cl = cl->c_next;
766 cle->c_next = NULL;
767
768 /*
769 * Register the rpc reply chunk destination
770 */
771 status = clist_register(conn, cle, 0);
772 if (status) {
773 rdma_buf_free(conn, &rpcreply);
774 clist_free(cle);
775 p->cku_err.re_status = RPC_CANTDECODERES;
776 p->cku_err.re_errno = EIO;
777 cmn_err(CE_WARN,
778 "clnt_rdma_kcallit: clist_register failed");
779 goto rdma_done;
780 }
781
782 /*
783 * Now read rpc reply in
784 */
785 #ifdef DEBUG
786 if (rdma_clnt_debug)
787 printf("clnt_rdma_kcallit: read chunk, len %d, xid %u, \
788 reply xid %u\n", cle->c_len, p->cku_xid, xid);
789 #endif
790 status = RDMA_READ(conn, cle, WAIT);
791 if (status) {
792 (void) clist_deregister(conn, cle, 0);
793 rdma_buf_free(conn, &rpcreply);
794 clist_free(cle);
795 p->cku_err.re_status = RPC_CANTDECODERES;
796 p->cku_err.re_errno = EIO;
797 cmn_err(CE_WARN,
798 "clnt_rdma_kcallit: RDMA_READ failed");
799 goto rdma_done;
800 }
801
802 /*
803 * sync the memory for dma
804 */
805 status = clist_syncmem(conn, cle, 0);
806 if (status != RDMA_SUCCESS) {
807 (void) clist_deregister(conn, cle, 0);
808 rdma_buf_free(conn, &rpcreply);
809 clist_free(cle);
810 p->cku_err.re_status = RPC_CANTDECODERES;
811 p->cku_err.re_errno = EIO;
812 goto rdma_done;
813 }
814
815 /*
816 * Deregister the Long RPC chunk
817 */
818 (void) clist_deregister(conn, cle, 0);
819 clist_free(cle);
820 xdrrdma_create(xdrs, rpcreply.addr, rpcreply.len, 0, cl,
821 XDR_DECODE, conn);
822 rxdrp = xdrs;
823 } else {
824 rpcreply.addr = NULL;
825 xdrrdma_create(xdrs,
826 (caddr_t)(uintptr_t)(recvlist->c_saddr + off),
827 recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
828 rxdrp = xdrs;
829 }
830
831 reply_msg.rm_direction = REPLY;
832 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
833 reply_msg.acpted_rply.ar_stat = SUCCESS;
834 reply_msg.acpted_rply.ar_verf = _null_auth;
835 /*
836 * xdr_results will be done in AUTH_UNWRAP.
837 */
838 reply_msg.acpted_rply.ar_results.where = NULL;
839 reply_msg.acpted_rply.ar_results.proc = xdr_void;
840
841 /*
842 * Decode and validate the response.
843 */
844 if (xdr_replymsg(xdrs, &reply_msg)) {
845 enum clnt_stat re_status;
846
847 _seterr_reply(&reply_msg, &(p->cku_err));
848
849 re_status = p->cku_err.re_status;
850 if (re_status == RPC_SUCCESS) {
895 break;
896 }
897 RPCLOG(1, "clnt_rdma_kcallit : "
898 "authentication failed with "
899 "RPC_AUTHERROR of type %d\n",
900 p->cku_err.re_why);
901 }
902 cmn_err(CE_WARN,
903 "clnt_rdma_kcallit: RPC failed");
904
905 }
906 } else {
907 p->cku_err.re_status = RPC_CANTDECODERES;
908 p->cku_err.re_errno = EIO;
909 cmn_err(CE_WARN, "clnt_rdma_kcallit: xdr_replymsg failed");
910 }
911
912 /*
913 * If rpc reply is in a chunk, free it now.
914 */
915 if (rpcreply.addr != NULL)
916 rdma_buf_free(conn, &rpcreply);
917
918 rdma_done:
919 if ((cl != NULL) || (op == RDMA_NOMSG)) {
920 rdma_buf_t donemsg;
921
922 /*
923 * Free the list holding the chunk info
924 */
925 if (cl) {
926 clist_free(cl);
927 cl = NULL;
928 }
929
930 /*
931 * Tell the server that the reads are done
932 */
933 donemsg.type = SEND_BUFFER;
934 if (RDMA_BUF_ALLOC(conn, &donemsg)) {
935 p->cku_err.re_status = RPC_CANTSEND;
936 p->cku_err.re_errno = EIO;
937 RCSTAT_INCR(rcnomem);
938 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffer");
939 goto done;
940 }
941 xdrs = &p->cku_outxdr;
942 xdrmem_create(xdrs, donemsg.addr, donemsg.len, XDR_ENCODE);
943 vers = RPCRDMA_VERS;
944 op = RDMA_DONE;
945
946 /*
947 * Treat xid as opaque (xid is the first entity
948 * in the rpc rdma message).
949 */
950 (*(uint32_t *)donemsg.addr) = p->cku_xid;
951 /* Skip xid and set the xdr position accordingly. */
952 XDR_SETPOS(xdrs, sizeof (uint32_t));
953 if (!xdr_u_int(xdrs, &vers) ||
954 !xdr_u_int(xdrs, &op)) {
955 cmn_err(CE_WARN,
956 "clnt_rdma_kcallit: xdr_u_int failed");
957 rdma_buf_free(conn, &donemsg);
958 goto done;
959 }
960
961 sendlist = NULL;
962 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &donemsg.handle,
963 donemsg.addr, NULL, NULL);
964
965 status = RDMA_SEND(conn, sendlist, p->cku_xid);
966 if (status != RDMA_SUCCESS) {
967 cmn_err(CE_WARN,
968 "clnt_rdma_kcallit: RDMA_SEND failed xid %u",
969 p->cku_xid);
970 }
971 #ifdef DEBUG
972 else {
973 if (rdma_clnt_debug)
974 printf("clnt_rdma_kcallit: sent RDMA_DONE xid %u\n",
975 p->cku_xid);
976 }
977 #endif
978 clist_free(sendlist);
979 }
980
981 done:
982 if (cxdrp)
983 XDR_DESTROY(cxdrp);
984 if (rxdrp) {
985 (void) xdr_rpc_free_verifier(rxdrp, &reply_msg);
986 XDR_DESTROY(rxdrp);
987 }
988
989 if (recvlist) {
990 rdma_buf_t recvmsg;
991
992 recvmsg.addr = (caddr_t)(uintptr_t)recvlist->c_saddr;
993 recvmsg.type = RECV_BUFFER;
994 RDMA_BUF_FREE(conn, &recvmsg);
995 clist_free(recvlist);
996 }
997 RDMA_REL_CONN(conn);
998 if (p->cku_err.re_status != RPC_SUCCESS) {
999 RCSTAT_INCR(rcbadcalls);
1000 }
1001 return (p->cku_err.re_status);
1002 }
1003
1004 /* ARGSUSED */
1005 static void
1006 clnt_rdma_kabort(CLIENT *h)
1007 {
1008 }
1009
1010 static void
1011 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
1012 {
1013 struct cku_private *p = htop(h);
1014
1015 *err = p->cku_err;
1016 }
1017
1018 static bool_t
1019 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
1020 {
1021 struct cku_private *p = htop(h);
1022 XDR *xdrs;
1023
1036 /* ARGSUSED */
1037 static int
1038 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
1039 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
1040 uint32_t xid)
1041 {
1042 RCSTAT_INCR(rctimers);
1043 return (0);
1044 }
1045
1046 int
1047 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
1048 {
1049 rdma_registry_t *rp;
1050 void *handle = NULL;
1051 struct knetconfig *knc;
1052 char *pf, *p;
1053 rdma_stat status;
1054 int error = 0;
1055
1056 if (!INGLOBALZONE(curproc))
1057 return (-1);
1058 /*
1059 * modload the RDMA plugins if not already done.
1060 */
1061 if (!rdma_modloaded) {
1062 mutex_enter(&rdma_modload_lock);
1063 if (!rdma_modloaded) {
1064 error = rdma_modload();
1065 }
1066 mutex_exit(&rdma_modload_lock);
1067 if (error)
1068 return (-1);
1069 }
1070
1071 if (!rdma_dev_available)
1072 return (-1);
1073
1074 rw_enter(&rdma_lock, RW_READER);
1075 rp = rdma_mod_head;
1085 if (addr_type == AF_INET)
1086 (void) strncpy(pf, NC_INET, KNC_STRSIZE);
1087 else if (addr_type == AF_INET6)
1088 (void) strncpy(pf, NC_INET6, KNC_STRSIZE);
1089 pf[KNC_STRSIZE - 1] = '\0';
1090
1091 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
1092 p[KNC_STRSIZE - 1] = '\0';
1093
1094 knc->knc_protofmly = pf;
1095 knc->knc_proto = p;
1096 knc->knc_rdev = (dev_t)handle;
1097 *knconf = knc;
1098 rw_exit(&rdma_lock);
1099 return (0);
1100 }
1101 rp = rp->r_next;
1102 }
1103 rw_exit(&rdma_lock);
1104 return (-1);
1105 }
|
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29 * Portions of this source code were derived from Berkeley
30 * 4.3 BSD under license from the Regents of the University of
31 * California.
32 */
33
34 /* Copyright (c) 2006, The Ohio State University. All rights reserved.
35 *
36 * Portions of this source code is developed by the team members of
37 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
38 * headed by Professor Dhabaleswar K. (DK) Panda.
39 *
40 * Acknowledgements to contributions from developors:
41 * Ranjit Noronha: noronha@cse.ohio-state.edu
42 * Lei Chai : chail@cse.ohio-state.edu
43 * Weikuan Yu : yuw@cse.ohio-state.edu
44 *
45 */
46
47 #pragma ident "@(#)clnt_rdma.c 1.10 05/07/26 SMI"
48
49 #include <sys/param.h>
50 #include <sys/types.h>
51 #include <sys/user.h>
52 #include <sys/systm.h>
53 #include <sys/sysmacros.h>
54 #include <sys/errno.h>
55 #include <sys/kmem.h>
56 #include <sys/debug.h>
57 #include <sys/systm.h>
58 #include <sys/kstat.h>
59 #include <sys/t_lock.h>
60 #include <sys/ddi.h>
61 #include <sys/cmn_err.h>
62 #include <sys/time.h>
63 #include <sys/isa_defs.h>
64 #include <sys/zone.h>
65
66 #include <rpc/types.h>
67 #include <rpc/xdr.h>
68 #include <rpc/auth.h>
69 #include <rpc/clnt.h>
70 #include <rpc/rpc_msg.h>
71 #include <rpc/rpc_rdma.h>
72 #include <nfs/nfs.h>
73
74 #define CLNT_CREDIT_LOW (5)
75
76 xdrproc_t x_READ3args = NULL_xdrproc_t;
77 xdrproc_t x_READ3res = NULL_xdrproc_t;
78 xdrproc_t x_READ3vres = NULL_xdrproc_t;
79 xdrproc_t x_READ3uiores = NULL_xdrproc_t;
80
81 static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST;
82
83 int rdma_wlist_verbose_debug = 0;
84 int rdma_wlist_memreg_debug = 0;
85 int rdma_wlist_clnt_debug = 0;
86 int rdma_wlist_svc_debug = 0;
87 int rdma_wlist_xdr_debug = 0;
88 int rdma_wlist_pglck_debug = 0;
89 int credit_control_debug = 0;
90 int rdma_long_reply_debug = 0;
91 int rdma_xdr_long_reply_debug = 0;
92
93 struct clist empty_cl = {0};
94
95 static void clnt_read3args_make_wlist(caddr_t, struct clist **, xdrproc_t, uint_t *);
96 static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *,
97 XDR *, xdrproc_t, caddr_t);
98 static int clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *,
99 XDR **, uint_t *);
100 static int clnt_setup_rlist(CONN *, XDR *, struct clist **);
101 static int clnt_setup_wlist(CONN *, rpcproc_t, struct clist **,
102 caddr_t, xdrproc_t, XDR *);
103 static int clnt_setup_long_reply(CONN *, rpcproc_t, struct clist *,
104 XDR *, bool_t *);
105 #ifdef DYNAMIC_CREDIT_CONTROL
106 static void clnt_compute_credit(CONN *, uint32_t *);
107 #endif
108 static void clnt_check_credit(CONN *);
109 static void clnt_return_credit(CONN *);
110 static int clnt_decode_long_reply(CONN *, rpcproc_t, struct clist *,
111 struct clist *, XDR *, XDR **, struct clist *,
112 struct clist *, uint_t,uint_t);
113
114 static void clnt_update_credit(CONN *,uint32_t);
115 static void check_dereg_wlist(CONN *, struct clist *);
116
117 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
118 caddr_t, xdrproc_t, caddr_t, struct timeval);
119 static void clnt_rdma_kabort(CLIENT *);
120 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *);
121 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
122 static void clnt_rdma_kdestroy(CLIENT *);
123 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *);
124 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
125 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
126
127 /*
128 * Operations vector for RDMA based RPC
129 */
130 static struct clnt_ops rdma_clnt_ops = {
131 clnt_rdma_kcallit, /* do rpc call */
132 clnt_rdma_kabort, /* abort call */
133 clnt_rdma_kerror, /* return error status */
134 clnt_rdma_kfreeres, /* free results */
135 clnt_rdma_kdestroy, /* destroy rpc handle */
136 clnt_rdma_kcontrol, /* the ioctl() of rpc */
137 clnt_rdma_ksettimers, /* set retry timers */
138 };
139
140 /*
141 * The size of the preserialized RPC header information.
142 */
143 #define CKU_HDRSIZE 20
144 #define CLNT_RDMA_SUCCESS 0
145 #define CLNT_RDMA_FAIL -99
146
147 /*
148 * Per RPC RDMA endpoint details
149 */
150 typedef struct cku_private {
151 CLIENT cku_client; /* client handle */
152 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */
153 void *cku_rd_handle; /* underlying RDMA device */
154 struct netbuf cku_addr; /* remote netbuf address */
155 int cku_addrfmly; /* for finding addr_type */
156 struct rpc_err cku_err; /* error status */
157 struct cred *cku_cred; /* credentials */
158 XDR cku_outxdr; /* xdr stream for output */
159 uint32_t cku_outsz;
160 XDR cku_inxdr; /* xdr stream for input */
161 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
162 uint32_t cku_xid; /* current XID */
163 } cku_private_t;
164
165 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */
326 rw_exit(&rdma_lock);
327
328 /*
329 * Set up the rpc information
330 */
331 p->cku_cred = cred;
332 p->cku_xid = 0;
333
334 if (p->cku_addr.maxlen < raddr->len) {
335 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
336 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
337 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
338 p->cku_addr.maxlen = raddr->maxlen;
339 }
340
341 p->cku_addr.len = raddr->len;
342 bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
343 h->cl_ops = &rdma_clnt_ops;
344 }
345
346 static int clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum,
347 rdma_buf_t *rpcmsg, XDR *xdrs,
348 xdrproc_t xdr_args, caddr_t argsp)
349 {
350 cku_private_t *p = htop(h);
351
352 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
353 /*
354 * Copy in the preserialized RPC header
355 * information.
356 */
357 bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE);
358
359 /*
360 * transaction id is the 1st thing in the output
361 * buffer.
362 */
363 /* LINTED pointer alignment */
364 (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid;
365
366 /* Skip the preserialized stuff. */
367 XDR_SETPOS(xdrs, CKU_HDRSIZE);
368
369 /* Serialize dynamic stuff into the output buffer. */
370 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
371 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
372 (!(*xdr_args)(xdrs, argsp))) {
373 cmn_err(CE_WARN,"Failed to serialize dynamic arguments\n");
374 return CLNT_RDMA_FAIL;
375 }
376 p->cku_outsz = XDR_GETPOS(xdrs);
377 } else {
378 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
379 IXDR_PUT_U_INT32(uproc, procnum);
380 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
381 XDR_SETPOS(xdrs, 0);
382
383 /* Serialize the procedure number and the arguments. */
384 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
385 CKU_HDRSIZE+4, xdrs, NULL, NULL) ||
386 !(*xdr_args)(xdrs, argsp)) {
387 if (rpcmsg->addr != xdrs->x_base) {
388 rpcmsg->addr = xdrs->x_base;
389 rpcmsg->len = xdr_getbufsize(xdrs);
390 }
391 cmn_err(CE_WARN,"Failed to serialize procedure number and the arguments.\n");
392 return CLNT_RDMA_FAIL;
393 }
394 /*
395 * If we had to allocate a new buffer while encoding
396 * then update the addr and len.
397 */
398 if (rpcmsg->addr != xdrs->x_base) {
399 rpcmsg->addr = xdrs->x_base;
400 rpcmsg->len = xdr_getbufsize(xdrs);
401 }
402
403 p->cku_outsz = XDR_GETPOS(xdrs);
404 }
405
406 return CLNT_RDMA_SUCCESS;
407 }
408
409 static int clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg,
410 XDR **xdrs, uint_t *op)
411 {
412 cku_private_t *p = htop(h);
413 uint_t vers;
414 uint32_t rdma_credit = rdma_bufs_rqst;
415
416 vers = RPCRDMA_VERS;
417 clmsg->type = SEND_BUFFER;
418
419 #ifdef DYNAMIC_CREDIT_CONTROL
420 clnt_compute_credit(conn, &rdma_credit);
421 #endif
422
423 if (RDMA_BUF_ALLOC(conn, clmsg)) {
424 return CLNT_RDMA_FAIL;
425 }
426
427 *xdrs = &p->cku_outxdr;
428 xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE);
429
430 (*(uint32_t *)clmsg->addr) = p->cku_xid;
431 XDR_SETPOS(*xdrs, sizeof (uint32_t));
432 (void) xdr_u_int(*xdrs, &vers);
433 (void) xdr_u_int(*xdrs, &rdma_credit);
434 (void) xdr_u_int(*xdrs, op);
435
436 return CLNT_RDMA_SUCCESS;
437 }
438
439 static int clnt_setup_rlist(CONN *conn, XDR *xdrs, struct clist **cl)
440 {
441 int ret;
442
443 if (*cl != NULL) {
444 ret = clist_register(conn, *cl, 1);
445 if (ret != RDMA_SUCCESS) {
446 return CLNT_RDMA_FAIL;
447 }
448 }
449 (void) xdr_do_clist(xdrs, cl);
450
451 return CLNT_RDMA_SUCCESS;
452 }
453
454 static int clnt_setup_wlist(CONN *conn, rpcproc_t procnum,
455 struct clist **rpccall_wlist, caddr_t resultsp,
456 xdrproc_t xdr_results, XDR *xdrs)
457 {
458 int status;
459 uint_t num_segment = 0;
460
461 if (procnum == NFSPROC3_READ) {
462 clnt_read3args_make_wlist(resultsp, rpccall_wlist,
463 xdr_results, &num_segment);
464 status = clist_register(conn, *rpccall_wlist, 0);
465 if (status != RDMA_SUCCESS)
466 return CLNT_RDMA_FAIL;
467 } else {
468 *rpccall_wlist = NULL;
469 }
470
471 if (! xdr_encode_wlist(xdrs, *rpccall_wlist, num_segment))
472 return CLNT_RDMA_FAIL;
473
474 return CLNT_RDMA_SUCCESS;
475 }
476
477 static int clnt_setup_long_reply(CONN *conn, rpcproc_t procnum,
478 struct clist *lrc_clist,
479 XDR *xdrs, bool_t *exists)
480 {
481 int status;
482 caddr_t addr;
483 #ifdef SERVER_REG_CACHE
484 rib_lrc_entry_t *long_reply_buf = NULL;
485 #endif
486 *exists = FALSE;
487 lrc_clist->c_daddr = NULL;
488
489 #ifdef RPC_RDMA_INLINE
490 if (lrc_clist->c_len < rdma_minchunk)
491 return CLNT_RDMA_SUCCESS;
492 #endif
493
494 if (procnum == NFSPROC3_READDIR ||
495 procnum == NFSPROC3_READDIRPLUS ||
496 procnum == NFSPROC3_READLINK) {
497 #ifndef SERVER_REG_CACHE
498 addr = kmem_alloc(LONG_REPLY_LEN, KM_SLEEP);
499 bzero(addr, LONG_REPLY_LEN);
500 lrc_clist->c_daddr = (uint64)addr;
501 lrc_clist->c_len = LONG_REPLY_LEN;
502 lrc_clist->c_next = NULL;
503 lrc_clist->long_reply_buf = NULL;
504 status = clist_register(conn, lrc_clist, 0);
505 #else
506 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, LONG_REPLY_LEN);
507 bzero(long_reply_buf->lrc_buf, LONG_REPLY_LEN);
508 lrc_clist->c_daddr = (uint64)long_reply_buf->lrc_buf;
509 lrc_clist->c_len = LONG_REPLY_LEN;
510 lrc_clist->c_next = NULL;
511 lrc_clist->long_reply_buf = (uint64)long_reply_buf;
512 lrc_clist->c_dmemhandle = long_reply_buf->lrc_mhandle;
513 status = clist_register(conn, lrc_clist, 0);
514 #endif
515 if(status) {
516 cmn_err(CE_WARN, "clnt_setup_long_reply: cannot register buffer");
517 #ifndef SERVER_REG_CACHE
518 kmem_free((void*)addr, (size_t)LONG_REPLY_LEN);
519 #else
520 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf);
521
522 #endif
523 lrc_clist->c_daddr = NULL;
524 return CLNT_RDMA_FAIL;
525 }
526 *exists = TRUE;
527 }
528
529 return CLNT_RDMA_SUCCESS;
530 }
531
532 static void
533 clnt_read3args_make_wlist(caddr_t replyp, struct clist **rpccall_wlist,
534 xdrproc_t xr, uint_t *num_segment)
535 {
536 READ3uiores *ures = (READ3uiores *)replyp;
537 READ3vres *vres = (READ3vres *)replyp;
538 struct clist *rwl = NULL, *prev = NULL;
539 int i, total_length;
540
541 *rpccall_wlist = NULL;
542
543 #ifdef RPC_RDMA_INLINE
544 if (xr == x_READ3uiores) {
545 total_length = 0;
546 for(i=0; i<ures->uiop->uio_iovcnt; i++) {
547 total_length += ures->uiop->uio_iov[i].iov_len;
548 }
549 } else {
550 total_length = vres->data.data_len;
551 }
552
553 if (total_length < rdma_minchunk)
554 return;
555 #endif
556
557 /* XXX: fake a chunk threshold for the combined length for now */
558 if (xr == x_READ3uiores) {
559 *num_segment = ures->uiop->uio_iovcnt;
560 for(i=0; i<ures->uiop->uio_iovcnt; i++) {
561 rwl = (struct clist *)kmem_zalloc(sizeof(struct clist),
562 KM_SLEEP);
563
564 rwl->c_len = ures->uiop->uio_iov[i].iov_len;
565 rwl->c_daddr = (uint64)(ures->uiop->uio_iov[i].iov_base);
566 /*
567 * if userspace address, put adspace ptr in clist.
568 * If not, then do nothing since it's already
569 * set to NULL (from empty_cl)
570 */
571 if (ures->uiop->uio_segflg == UIO_USERSPACE) {
572 int error;
573 rwl->c_adspc = ttoproc(curthread)->p_as;
574 } else {
575 rwl->c_dpplist = (page_t **)NULL;
576 }
577
578 if(prev == NULL)
579 prev = rwl;
580 else {
581 prev->c_next = rwl;
582 prev = rwl;
583 }
584
585 if(*rpccall_wlist == NULL)
586 *rpccall_wlist = rwl;
587 }
588 rwl->c_next = NULL;
589 } else if (xr == x_READ3vres) {
590 *num_segment = 1;
591 rwl = (struct clist *)kmem_zalloc(sizeof (struct clist),
592 KM_SLEEP);
593 *rwl = empty_cl;
594
595 rwl->c_len = vres->data.data_len;
596 rwl->c_daddr = (uint64)(vres->data.data_val);
597
598 if(*rpccall_wlist == NULL)
599 *rpccall_wlist = rwl;
600 } else {
601 /*cmn_err(CE_NOTE, "read3args_make_wlist: non READ3xr=%p",
602 (void *)xr);*/
603 }
604 }
605
606 /* ARGSUSED */
607 static enum clnt_stat
608 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
609 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait)
610 {
611 cku_private_t *p = htop(h);
612 int status;
613 XDR *xdrs;
614 XDR *cxdrp = NULL, callxdr; /* for xdrrdma encoding the RPC call */
615 XDR *rxdrp = NULL, replxdr; /* for xdrrdma decoding the RPC reply */
616 struct rpc_msg reply_msg;
617 struct clist *sendlist = NULL, *recvlist = NULL;
618 struct clist *cl = NULL, *cle = NULL, *rdma_reply = NULL;
619 uint_t vers, op;
620 uint_t off;
621 uint32_t xid;
622 uint32_t seg_array_len;
623 CONN *conn = NULL;
624 rdma_buf_t clmsg = {0}, rpcmsg = {0};
625 int msglen;
626 clock_t ticks;
627 bool_t wlist_exists_reply = FALSE;
628 bool_t long_reply_buf_exists = FALSE;
629
630 struct clist *rpccall_wlist = NULL, *rpcreply_wlist = NULL,
631 long_reply_clist ={0};
632 rpccall_read_t read_type;
633 rpccall_write_t write_type;
634 uint32_t rdma_credit = rdma_bufs_rqst;
635 struct clist long_reply_buf_clist = {0};
636
637 RCSTAT_INCR(rccalls);
638 /*
639 * Get unique xid
640 */
641 if (p->cku_xid == 0)
642 p->cku_xid = alloc_xid();
643
644 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr,
645 p->cku_addrfmly, p->cku_rd_handle, &conn);
646
647 if (conn == NULL) {
648 /*
649 * Connect failed to server. Could be because of one
650 * of several things. In some cases we don't want
651 * the caller to retry immediately - delay before
652 * returning to caller.
653 */
654 switch (status) {
655 case RDMA_TIMEDOUT:
656 /*
674 * down or temporary resource failure. Delay before
675 * returning to caller.
676 */
677 ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
678 p->cku_err.re_status = RPC_CANTCONNECT;
679 p->cku_err.re_errno = EIO;
680
681 if (h->cl_nosignal == TRUE) {
682 delay(ticks);
683 } else {
684 if (delay_sig(ticks) == EINTR) {
685 p->cku_err.re_status = RPC_INTR;
686 p->cku_err.re_errno = EINTR;
687 }
688 }
689 break;
690 }
691
692 return (p->cku_err.re_status);
693 }
694
695 clnt_check_credit(conn);
696
697 /*
698 * Get the size of the rpc call message. Need this
699 * to determine if the rpc call message will fit in
700 * the pre-allocated RDMA buffers. If the rpc call
701 * message length is greater that the pre-allocated
702 * buffers then, it is a Long RPC. A one time use
703 * buffer is allocated and registered for the Long
704 * RPC call.
705 */
706 xdrs = &callxdr;
707 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT;
708
709 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
710 msglen += xdrrdma_authsize(h->cl_auth, p->cku_cred,
711 rdma_minchunk);
712 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk);
713
714 if (msglen > RPC_MSG_SZ)
715 read_type = RPCCALL_RCHUNK;
716 else
717 read_type = RPCCALL_NORCHUNK;
718 } else {
719 /*
720 * For RPCSEC_GSS since we cannot accurately presize the
721 * buffer required for encoding, we assume that its going
722 * to be a Long RPC to start with. We also create the
723 * the XDR stream with min_chunk set to 0 which instructs
724 * the XDR layer to not chunk the incoming byte stream.
725 */
726
727 msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth);
728 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk);
729
730 if (msglen > RPC_MSG_SZ)
731 read_type = RPCCALL_RCHUNK;
732 else
733 read_type = RPCCALL_NORCHUNK;
734 }
735
736 if (read_type == RPCCALL_NORCHUNK) {
737
738 rpcmsg.type = SEND_BUFFER;
739 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) {
740 cmn_err(CE_WARN, "clnt_rdma_kcallit: no buffers!");
741 goto done;
742 }
743 } else {
744 #ifdef SERVER_REG_CACHE
745 rib_lrc_entry_t *long_reply_buf = NULL;
746 #endif
747 rpcmsg.type = CHUNK_BUFFER;
748 #ifdef SERVER_REG_CACHE
749 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, msglen);
750 rpcmsg.addr = long_reply_buf->lrc_buf;
751 #else
752 rpcmsg.addr = kmem_zalloc(msglen, KM_SLEEP);
753 #endif
754 cle = (struct clist *)kmem_zalloc(sizeof (struct clist),
755 KM_SLEEP);
756 cle->c_xdroff = 0;
757 cle->c_len = rpcmsg.len = msglen;
758 cle->c_saddr = (uint64)(uintptr_t)rpcmsg.addr;
759 cle->c_next = NULL;
760 #ifdef SERVER_REG_CACHE
761 cle->long_reply_buf = (uint64)long_reply_buf;
762 #endif
763 }
764
765 op = cle ? RDMA_NOMSG : RDMA_MSG;
766 cxdrp = xdrs;
767 xdrrdma_create(xdrs, rpcmsg.addr, (cle ? msglen : rpcmsg.len),
768 rdma_minchunk, cle, XDR_ENCODE, NULL);
769
770 status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, xdrs, xdr_args, argsp);
771 if (status != CLNT_RDMA_SUCCESS) {
772 rdma_buf_free(conn, &rpcmsg);
773 clist_free(cle);
774 p->cku_err.re_status = RPC_CANTENCODEARGS;
775 p->cku_err.re_errno = EIO;
776 cmn_err(CE_WARN,
777 "clnt_rdma_kcallit: clnt_compose_rpcmsg failed");
778 goto done;
779 }
780
781 /* Read chunklist (a linked list of N elements,
782 * position P (same P for all chunks of same arg!):
783 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
784 */
785
786 cl = xdrrdma_clist(xdrs);
787
788 /*
789 * Update the chunk size information for the Long RPC msg.
790 */
791 if (cl && op == RDMA_NOMSG)
792 cl->c_len = p->cku_outsz;
793
794 /*
795 * Prepare the header for the RDMA chunk
796 */
797 status = clnt_compose_rdma_header(conn, h, &clmsg, &xdrs, &op);
798 if (status != CLNT_RDMA_SUCCESS) {
799 p->cku_err.re_status = RPC_CANTSEND;
800 p->cku_err.re_errno = EIO;
801 rdma_buf_free(conn, &rpcmsg);
802 clist_free(cle);
803 RCSTAT_INCR(rcnomem);
804 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffers!!");
805 goto done;
806 }
807
808 status = clnt_setup_rlist(conn, xdrs, &cl);
809 if (status != CLNT_RDMA_SUCCESS) {
810 cmn_err(CE_WARN, "clnt_rdma_kcallit: clist register failed");
811 rdma_buf_free(conn, &clmsg);
812 rdma_buf_free(conn, &rpcmsg);
813 clist_free(cl);
814 p->cku_err.re_status = RPC_CANTSEND;
815 p->cku_err.re_errno = EIO;
816 goto done;
817 }
818
819 /* Setup write chunk list for NFS3 READ operation
820 * Other operations will have a NULL wlist
821 */
822 status = clnt_setup_wlist(conn, procnum, &rpccall_wlist,
823 resultsp, xdr_results, xdrs);
824 if (status != CLNT_RDMA_SUCCESS) {
825 rdma_buf_free(conn, &clmsg);
826 rdma_buf_free(conn, &rpcmsg);
827 clist_free(cl);
828 p->cku_err.re_status = RPC_CANTSEND;
829 p->cku_err.re_errno = EIO;
830 goto done;
831 }
832
833 status = clnt_setup_long_reply(conn, procnum, &long_reply_buf_clist,
834 xdrs, &long_reply_buf_exists);
835 if (status != CLNT_RDMA_SUCCESS) {
836 rdma_buf_free(conn, &clmsg);
837 rdma_buf_free(conn, &rpcmsg);
838 clist_free(cl);
839 p->cku_err.re_status = RPC_CANTSEND;
840 p->cku_err.re_errno = EIO;
841 goto done;
842 }
843
844 /*
845 * XDR encode the RDMA_REPLY write chunk
846 */
847 seg_array_len = (long_reply_buf_exists ? 1:0);
848 (void) xdr_encode_reply_wchunk(xdrs, &long_reply_buf_clist, seg_array_len);
849 /*
850 * Start with the RDMA header and clist (if any)
851 */
852 sendlist = NULL;
853 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle,
854 clmsg.addr, NULL, NULL);
855 /*
856 * Put the RPC call message in the send list if small RPC
857 */
858 if (op == RDMA_MSG) {
859 clist_add(&sendlist, 0, p->cku_outsz, &rpcmsg.handle,
860 rpcmsg.addr, NULL, NULL);
861 } else {
862 /* Long RPC already in chunk list */
863 RCSTAT_INCR(rclongrpcs);
864 }
865
866 /*
867 * Set up a reply buffer ready for the reply
868 */
869 status = rdma_clnt_postrecv(conn, p->cku_xid);
870 if (status != RDMA_SUCCESS) {
871 rdma_buf_free(conn, &clmsg);
872 rdma_buf_free(conn, &rpcmsg);
873 if (cl) {
874 (void) clist_deregister(conn, cl, 1);
882 /*
883 * sync the memory for dma
884 */
885 if (cl != NULL) {
886 status = clist_syncmem(conn, cl, 1);
887 if (status != RDMA_SUCCESS) {
888 rdma_buf_free(conn, &clmsg);
889 rdma_buf_free(conn, &rpcmsg);
890 (void) clist_deregister(conn, cl, 1);
891 clist_free(cl);
892 clist_free(sendlist);
893 p->cku_err.re_status = RPC_CANTSEND;
894 p->cku_err.re_errno = EIO;
895 goto done;
896 }
897 }
898
899 /*
900 * Send the call message to the server
901 */
902 #if defined (CLNT_INTERRUPT_COAL)
903 status = RDMA_SEND_BL(conn, sendlist, p->cku_xid);
904 #else
905 status = RDMA_SEND(conn, sendlist, p->cku_xid);
906 #endif
907 if (status != RDMA_SUCCESS) {
908 if (cl) {
909 (void) clist_deregister(conn, cl, 1);
910 clist_free(cl);
911 /*
912 * If this was a long RPC message, need
913 * to free that buffer.
914 */
915 if (rpcmsg.type == CHUNK_BUFFER)
916 rdma_buf_free(conn, &rpcmsg);
917 }
918 clist_free(sendlist);
919 p->cku_err.re_status = RPC_CANTSEND;
920 p->cku_err.re_errno = EIO;
921 goto done;
922 } else {
923 /*
924 * RDMA plugin now owns the send msg buffers.
925 * Clear them out and don't free them here.
926 */
927 clmsg.addr = NULL;
928 if (rpcmsg.type == SEND_BUFFER)
929 rpcmsg.addr = NULL;
930 }
931 clist_free(sendlist);
932
933 /*
934 * Recv rpc reply
935 */
936 status = RDMA_RECV(conn, &recvlist, p->cku_xid);
937 clnt_return_credit(conn);
938
939 /*
940 * Deregister chunks sent. Do this only after the reply
941 * is received as that is a sure indication that the
942 * remote end has completed RDMA of the chunks.
943 */
944 if (cl != NULL) {
945 /*
946 * Deregister the chunks
947 */
948 (void) clist_deregister(conn, cl, 1);
949 clist_free(cl);
950 /*
951 * If long RPC free chunk
952 */
953 rdma_buf_free(conn, &rpcmsg);
954 }
955
956 /*
957 * Now check recv status
958 */
959 if (status != 0) {
960 if (status == RDMA_INTR) {
961 p->cku_err.re_status = RPC_INTR;
962 p->cku_err.re_errno = EINTR;
963 RCSTAT_INCR(rcintrs);
964 } else if (status == RPC_TIMEDOUT) {
965 p->cku_err.re_status = RPC_TIMEDOUT;
966 p->cku_err.re_errno = ETIMEDOUT;
967 RCSTAT_INCR(rctimeouts);
968 } else {
969 p->cku_err.re_status = RPC_CANTRECV;
970 p->cku_err.re_errno = EIO;
971 }
972 goto done;
973 }
974 /*
975 * Process the reply message.
976 *
977 * First the chunk list (if any)
978 */
979 xdrs = &(p->cku_inxdr);
980 xdrmem_create(xdrs, (caddr_t)(uintptr_t)recvlist->c_saddr,
981 recvlist->c_len, XDR_DECODE);
982 /*
983 * Treat xid as opaque (xid is the first entity
984 * in the rpc rdma message).
985 */
986 xid = *(uint32_t *)(uintptr_t)recvlist->c_saddr;
987 /* Skip xid and set the xdr position accordingly. */
988 XDR_SETPOS(xdrs, sizeof (uint32_t));
989 (void) xdr_u_int(xdrs, &vers);
990 (void) xdr_u_int(xdrs, &rdma_credit);
991 (void) xdr_u_int(xdrs, &op);
992 (void) xdr_do_clist(xdrs, &cl);
993 clnt_update_credit(conn, rdma_credit);
994 wlist_exists_reply = FALSE;
995 if (! xdr_decode_wlist(xdrs, &rpcreply_wlist, &wlist_exists_reply)) {
996 cmn_err(CE_NOTE,
997 "clnt_rdma_kcallit: xdr_decode_wlist failed");
998 /* XXX: what should we fail with here -- EIO? */
999 }
1000 #ifdef RPC_RDMA_INLINE
1001 if (xdr_results == x_READ3vres) {
1002 ((READ3vres *)resultsp)->wlist = NULL;
1003 } else if (xdr_results == x_READ3uiores) {
1004 ((READ3uiores *)resultsp)->wlist = NULL;
1005 }
1006 #endif
1007
1008 if (procnum == NFSPROC3_READ) {
1009
1010 check_dereg_wlist(conn, rpccall_wlist);
1011
1012 if (wlist_exists_reply) {
1013 if (xdr_results == x_READ3vres) {
1014 ((READ3vres *)resultsp)->wlist =
1015 rpcreply_wlist;
1016 ((READ3vres *)resultsp)->wlist_len =
1017 rpcreply_wlist->c_len;
1018 } else if (xdr_results == x_READ3uiores) {
1019 ((READ3uiores *)resultsp)->wlist =
1020 rpcreply_wlist;
1021 ((READ3uiores *)resultsp)->wlist_len =
1022 rpcreply_wlist->c_len;
1023 } else {
1024 cmn_err(CE_NOTE,
1025 "unknown READ3 xdr decode fnp=%p",
1026 (void *)xdr_results);
1027 }
1028 }
1029 } else {
1030 if(wlist_exists_reply)
1031 cmn_err(CE_NOTE,
1032 "clnt_rdma_kcallit: received wlist for "
1033 "non-READ3 call. reply xdr decode fnp=%p",
1034 (void *)xdr_results);
1035 }
1036
1037 /*
1038 * The server shouldn't have sent a RDMA_SEND that
1039 * the client needs to RDMA_WRITE a reply back to
1040 * the server. So silently ignoring what the
1041 * server returns in the rdma_reply section of the
1042 * header.
1043 */
1044 (void) xdr_decode_reply_wchunk(xdrs, &rdma_reply,conn);
1045 off = xdr_getpos(xdrs);
1046
1047 xdrs = &replxdr;
1048 if (clnt_decode_long_reply(conn, procnum, &long_reply_buf_clist,
1049 rdma_reply, xdrs, &rxdrp,
1050 cl, recvlist, op, off) != CLNT_RDMA_SUCCESS)
1051 {
1052 goto done;
1053 }
1054 reply_msg.rm_direction = REPLY;
1055 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
1056 reply_msg.acpted_rply.ar_stat = SUCCESS;
1057 reply_msg.acpted_rply.ar_verf = _null_auth;
1058 /*
1059 * xdr_results will be done in AUTH_UNWRAP.
1060 */
1061 reply_msg.acpted_rply.ar_results.where = NULL;
1062 reply_msg.acpted_rply.ar_results.proc = xdr_void;
1063
1064 /*
1065 * Decode and validate the response.
1066 */
1067 if (xdr_replymsg(xdrs, &reply_msg)) {
1068 enum clnt_stat re_status;
1069
1070 _seterr_reply(&reply_msg, &(p->cku_err));
1071
1072 re_status = p->cku_err.re_status;
1073 if (re_status == RPC_SUCCESS) {
1118 break;
1119 }
1120 RPCLOG(1, "clnt_rdma_kcallit : "
1121 "authentication failed with "
1122 "RPC_AUTHERROR of type %d\n",
1123 p->cku_err.re_why);
1124 }
1125 cmn_err(CE_WARN,
1126 "clnt_rdma_kcallit: RPC failed");
1127
1128 }
1129 } else {
1130 p->cku_err.re_status = RPC_CANTDECODERES;
1131 p->cku_err.re_errno = EIO;
1132 cmn_err(CE_WARN, "clnt_rdma_kcallit: xdr_replymsg failed");
1133 }
1134
1135 /*
1136 * If rpc reply is in a chunk, free it now.
1137 */
1138 done:
1139 if (long_reply_buf_exists){
1140 (void) clist_deregister(conn, &long_reply_buf_clist, 0);
1141 #ifndef SERVER_REG_CACHE
1142 kmem_free((void *)long_reply_buf_clist.c_daddr,
1143 (size_t)long_reply_buf_clist.c_len);
1144 #else
1145 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf_clist.long_reply_buf);
1146 #endif
1147 }
1148 if (cxdrp)
1149 XDR_DESTROY(cxdrp);
1150 if (rxdrp) {
1151 (void) xdr_rpc_free_verifier(rxdrp, &reply_msg);
1152 XDR_DESTROY(rxdrp);
1153 }
1154
1155 if (recvlist) {
1156 rdma_buf_t recvmsg = {0};
1157 recvmsg.addr = (caddr_t)(uintptr_t)recvlist->c_saddr;
1158 recvmsg.type = RECV_BUFFER;
1159 RDMA_BUF_FREE(conn, &recvmsg);
1160 clist_free(recvlist);
1161 }
1162 #if (!defined(ASYNC_CLIENT_DEREG))
1163 if(rpccall_wlist){
1164 kmem_free(rpccall_wlist, sizeof(clist));
1165 }
1166 #endif
1167
1168 RDMA_REL_CONN(conn);
1169 if (p->cku_err.re_status != RPC_SUCCESS) {
1170 RCSTAT_INCR(rcbadcalls);
1171 }
1172 return (p->cku_err.re_status);
1173 }
1174
1175 static int clnt_decode_long_reply(CONN *conn, rpcproc_t procnum,
1176 struct clist *long_reply_buf_clist,
1177 struct clist *rdma_reply, XDR *xdrs,
1178 XDR **rxdrp, struct clist *cl,
1179 struct clist *recvlist,
1180 uint_t op,uint_t off)
1181 {
1182 if ( RDMA_NOMSG == op && long_reply_buf_clist->c_daddr) {
1183 if (procnum == NFSPROC3_READDIR ||
1184 procnum == NFSPROC3_READDIRPLUS ||
1185 procnum == NFSPROC3_READLINK) {
1186 xdrmem_destroy(xdrs);
1187 xdrrdma_create(xdrs,
1188 (caddr_t)long_reply_buf_clist->c_daddr,
1189 rdma_reply->c_len,
1190 0,
1191 NULL,
1192 XDR_DECODE,
1193 conn);
1194
1195 *rxdrp = xdrs;
1196 } else {
1197 cmn_err(CE_NOTE, "clnt_rdma_kcallit: "
1198 "wchunk buffer for wrong nfs proc");
1199 xdrmem_destroy(xdrs);
1200 *rxdrp = NULL;
1201 }
1202 } else if (cl && RDMA_NOMSG == op) {
1203 cmn_err(CE_NOTE, "clnt_rdma_kcallit: "
1204 "Server sent a READ list in the RPC Reply");
1205 xdrmem_destroy(xdrs);
1206 } else {
1207 xdrmem_destroy(xdrs);
1208 xdrrdma_create(xdrs,
1209 (caddr_t)(uintptr_t)(recvlist->c_saddr + off),
1210 recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
1211 *rxdrp = xdrs;
1212 }
1213 return CLNT_RDMA_SUCCESS;
1214 }
1215
1216 #ifdef DYNAMIC_CREDIT_CONTROL
1217 static void clnt_compute_credit(CONN *conn, uint32_t *rdma_credit)
1218 {
1219 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1220
1221 mutex_enter(&conn->c_lock);
1222 if(cc_info->clnt_cc_granted_ops - cc_info->clnt_cc_in_flight_ops < CLNT_CREDIT_LOW)
1223 *rdma_credit = rdma_bufs_rqst + cc_info->clnt_cc_in_flight_ops / 2;
1224 mutex_exit(&conn->c_lock);
1225 }
1226 #endif
1227
1228 static void clnt_return_credit(CONN *conn)
1229 {
1230 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1231
1232 mutex_enter(&conn->c_lock);
1233 cc_info->clnt_cc_in_flight_ops--;
1234 cv_signal(&cc_info->clnt_cc_cv);
1235 mutex_exit(&conn->c_lock);
1236 }
1237
1238 static void clnt_update_credit(CONN *conn, uint32_t rdma_credit)
1239 {
1240 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1241
1242 /*
1243 * Get the granted number of buffers for credit control.
1244 */
1245 mutex_enter(&conn->c_lock);
1246 cc_info->clnt_cc_granted_ops = rdma_credit;
1247 mutex_exit(&conn->c_lock);
1248 }
1249
1250 static void clnt_check_credit(CONN *conn)
1251 {
1252 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1253
1254 /*
1255 * Make sure we are not going over our allowed buffer use
1256 * (and make sure we have gotten a granted value before).
1257 */
1258 mutex_enter(&conn->c_lock);
1259 while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops
1260 && cc_info->clnt_cc_granted_ops != 0) {
1261 /*
1262 * Client has maxed out its granted buffers due to
1263 * credit control. Current handling is to block and wait.
1264 */
1265 cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock);
1266 }
1267 cc_info->clnt_cc_in_flight_ops++;
1268 mutex_exit(&conn->c_lock);
1269 }
1270
1271 /* ARGSUSED */
1272 static void
1273 clnt_rdma_kabort(CLIENT *h)
1274 {
1275 }
1276
1277 static void
1278 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
1279 {
1280 struct cku_private *p = htop(h);
1281
1282 *err = p->cku_err;
1283 }
1284
1285 static bool_t
1286 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
1287 {
1288 struct cku_private *p = htop(h);
1289 XDR *xdrs;
1290
1303 /* ARGSUSED */
1304 static int
1305 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
1306 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
1307 uint32_t xid)
1308 {
1309 RCSTAT_INCR(rctimers);
1310 return (0);
1311 }
1312
1313 int
1314 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
1315 {
1316 rdma_registry_t *rp;
1317 void *handle = NULL;
1318 struct knetconfig *knc;
1319 char *pf, *p;
1320 rdma_stat status;
1321 int error = 0;
1322
1323 mutex_enter(&rdma_modload_lock);
1324 error = rdma_modload();
1325 mutex_exit(&rdma_modload_lock);
1326
1327 if (!INGLOBALZONE(curproc))
1328 return (-1);
1329 /*
1330 * modload the RDMA plugins if not already done.
1331 */
1332 if (!rdma_modloaded) {
1333 mutex_enter(&rdma_modload_lock);
1334 if (!rdma_modloaded) {
1335 error = rdma_modload();
1336 }
1337 mutex_exit(&rdma_modload_lock);
1338 if (error)
1339 return (-1);
1340 }
1341
1342 if (!rdma_dev_available)
1343 return (-1);
1344
1345 rw_enter(&rdma_lock, RW_READER);
1346 rp = rdma_mod_head;
1356 if (addr_type == AF_INET)
1357 (void) strncpy(pf, NC_INET, KNC_STRSIZE);
1358 else if (addr_type == AF_INET6)
1359 (void) strncpy(pf, NC_INET6, KNC_STRSIZE);
1360 pf[KNC_STRSIZE - 1] = '\0';
1361
1362 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
1363 p[KNC_STRSIZE - 1] = '\0';
1364
1365 knc->knc_protofmly = pf;
1366 knc->knc_proto = p;
1367 knc->knc_rdev = (dev_t)handle;
1368 *knconf = knc;
1369 rw_exit(&rdma_lock);
1370 return (0);
1371 }
1372 rp = rp->r_next;
1373 }
1374 rw_exit(&rdma_lock);
1375 return (-1);
1376 }
1377
1378 static void
1379 check_dereg_wlist(CONN *conn, clist *rwc)
1380 {
1381 if (rwc == NULL)
1382 return;
1383
1384 if (rwc) {
1385 if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) {
1386 int status;
1387 #if defined(ASYNC_CLIENT_DEREG)
1388 /* Add in an entry to rqueue */
1389 INSERT_QUEUE(conn, rwc);
1390 #else
1391 status = clist_deregister(conn, rwc, FALSE);
1392 if (status != RDMA_SUCCESS) {
1393 cmn_err(CE_NOTE, "dereg_wlist failed."
1394 "status=%d", status);
1395 }
1396 #endif
1397 }
1398
1399 }
1400 }
|