Sdiff clnt_rdma.c


14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 /* 29 * Portions of this source code were derived from Berkeley 30 * 4.3 BSD under license from the Regents of the University of 31 * California. 32 */ 33 34 #pragma ident "@(#)clnt_rdma.c 1.10 05/07/26 SMI" 35 36 #include <sys/param.h> 37 #include <sys/types.h> 38 #include <sys/user.h> 39 #include <sys/systm.h> 40 #include <sys/sysmacros.h> 41 #include <sys/errno.h> 42 #include <sys/kmem.h> 43 #include <sys/debug.h> 44 #include <sys/systm.h> 45 #include <sys/kstat.h> 46 #include <sys/t_lock.h> 47 #include <sys/ddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/time.h> 50 #include <sys/isa_defs.h> 51 #include <sys/zone.h> 52 53 #include <rpc/types.h> 54 #include <rpc/xdr.h> 55 #include <rpc/auth.h> 56 #include <rpc/clnt.h> 57 #include <rpc/rpc_msg.h> 58 #include <rpc/rpc_rdma.h> 59 60 61 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 62 caddr_t, xdrproc_t, caddr_t, struct timeval); 63 static void clnt_rdma_kabort(CLIENT *); 64 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *); 65 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t); 66 static void clnt_rdma_kdestroy(CLIENT *); 67 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *); 68 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *, 69 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 70 71 /* 72 * Operations vector for RDMA based RPC 73 */ 74 static struct clnt_ops rdma_clnt_ops = { 75 clnt_rdma_kcallit, /* do rpc call */ 76 clnt_rdma_kabort, /* abort call */ 77 clnt_rdma_kerror, /* return error status */ 78 clnt_rdma_kfreeres, /* free results */ 79 clnt_rdma_kdestroy, /* destroy rpc handle */ 80 clnt_rdma_kcontrol, /* the ioctl() of rpc */ 81 clnt_rdma_ksettimers, /* set retry timers */ 82 }; 83 84 /* 85 * The size of the preserialized RPC header information. 86 */ 87 #define CKU_HDRSIZE 20 88 89 /* 90 * Per RPC RDMA endpoint details 91 */ 92 typedef struct cku_private { 93 CLIENT cku_client; /* client handle */ 94 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */ 95 void *cku_rd_handle; /* underlying RDMA device */ 96 struct netbuf cku_addr; /* remote netbuf address */ 97 int cku_addrfmly; /* for finding addr_type */ 98 struct rpc_err cku_err; /* error status */ 99 struct cred *cku_cred; /* credentials */ 100 XDR cku_outxdr; /* xdr stream for output */ 101 uint32_t cku_outsz; 102 XDR cku_inxdr; /* xdr stream for input */ 103 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */ 104 uint32_t cku_xid; /* current XID */ 105 } cku_private_t; 106 107 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */
268 rw_exit(&rdma_lock); 269 270 /* 271 * Set up the rpc information 272 */ 273 p->cku_cred = cred; 274 p->cku_xid = 0; 275 276 if (p->cku_addr.maxlen < raddr->len) { 277 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 278 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 279 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 280 p->cku_addr.maxlen = raddr->maxlen; 281 } 282 283 p->cku_addr.len = raddr->len; 284 bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 285 h->cl_ops = &rdma_clnt_ops; 286 } 287 288 /* ARGSUSED */ 289 static enum clnt_stat 290 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 291 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait) 292 { 293 cku_private_t *p = htop(h); 294 int status; 295 XDR *xdrs; 296 XDR *cxdrp = NULL, callxdr; /* for xdrrdma encoding the RPC call */ 297 XDR *rxdrp = NULL, replxdr; /* for xdrrdma decoding the RPC reply */ 298 struct rpc_msg reply_msg; 299 struct clist *sendlist, *recvlist = NULL; 300 struct clist *cl = NULL, *cle = NULL; 301 uint_t vers, op; 302 uint_t off; 303 uint32_t xid; 304 CONN *conn = NULL; 305 rdma_buf_t clmsg, rpcmsg, longmsg, rpcreply; 306 int msglen; 307 clock_t ticks; 308 309 RCSTAT_INCR(rccalls); 310 /* 311 * Get unique xid 312 */ 313 if (p->cku_xid == 0) 314 p->cku_xid = alloc_xid(); 315 316 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr, 317 p->cku_addrfmly, p->cku_rd_handle, &conn); 318 319 if (conn == NULL) { 320 /* 321 * Connect failed to server. Could be because of one 322 * of several things. In some cases we don't want 323 * the caller to retry immediately - delay before 324 * returning to caller. 325 */ 326 switch (status) { 327 case RDMA_TIMEDOUT: 328 /*
346 * down or temporary resource failure. Delay before 347 * returning to caller. 348 */ 349 ticks = clnt_rdma_min_delay * drv_usectohz(1000000); 350 p->cku_err.re_status = RPC_CANTCONNECT; 351 p->cku_err.re_errno = EIO; 352 353 if (h->cl_nosignal == TRUE) { 354 delay(ticks); 355 } else { 356 if (delay_sig(ticks) == EINTR) { 357 p->cku_err.re_status = RPC_INTR; 358 p->cku_err.re_errno = EINTR; 359 } 360 } 361 break; 362 } 363 364 return (p->cku_err.re_status); 365 } 366 /* 367 * Get the size of the rpc call message. Need this 368 * to determine if the rpc call message will fit in 369 * the pre-allocated RDMA buffers. If the rpc call 370 * message length is greater that the pre-allocated 371 * buffers then, it is a Long RPC. A one time use 372 * buffer is allocated and registered for the Long 373 * RPC call. 374 */ 375 xdrs = &callxdr; 376 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT; 377 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 378 msglen += xdrrdma_authsize(h->cl_auth, p->cku_cred, 379 rdma_minchunk); 380 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk); 381 382 if (msglen > RPC_MSG_SZ) { 383 384 /* 385 * Long RPC. Allocate one time use custom buffer. 386 */ 387 rpcmsg.type = CHUNK_BUFFER; 388 rpcmsg.addr = kmem_zalloc(msglen, KM_SLEEP); 389 cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); 390 cle->c_xdroff = 0; 391 cle->c_len = rpcmsg.len = msglen; 392 cle->c_saddr = (uint64)(uintptr_t)rpcmsg.addr; 393 cle->c_next = NULL; 394 xdrrdma_create(xdrs, rpcmsg.addr, msglen, 395 rdma_minchunk, cle, XDR_ENCODE, NULL); 396 cxdrp = xdrs; 397 op = RDMA_NOMSG; 398 } else { 399 /* 400 * Get a pre-allocated buffer for rpc call 401 */ 402 rpcmsg.type = SEND_BUFFER; 403 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) { 404 p->cku_err.re_status = RPC_CANTSEND; 405 p->cku_err.re_errno = EIO; 406 RCSTAT_INCR(rcnomem); 407 cmn_err(CE_WARN, 408 "clnt_rdma_kcallit: no buffers!"); 409 goto done; 410 } 411 xdrrdma_create(xdrs, rpcmsg.addr, rpcmsg.len, 412 rdma_minchunk, NULL, XDR_ENCODE, NULL); 413 cxdrp = xdrs; 414 op = RDMA_MSG; 415 } 416 } else { 417 /* 418 * For RPCSEC_GSS since we cannot accurately presize the 419 * buffer required for encoding, we assume that its going 420 * to be a Long RPC to start with. We also create the 421 * the XDR stream with min_chunk set to 0 which instructs 422 * the XDR layer to not chunk the incoming byte stream. 423 */ 424 425 msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth); 426 msglen += xdr_sizeof(xdr_args, argsp); 427 428 /* 429 * Long RPC. Allocate one time use custom buffer. 430 */ 431 longmsg.type = CHUNK_BUFFER; 432 longmsg.addr = kmem_zalloc(msglen, KM_SLEEP); 433 cle = kmem_zalloc(sizeof (*cle), KM_SLEEP); 434 cle->c_xdroff = 0; 435 cle->c_len = longmsg.len = msglen; 436 cle->c_saddr = (uint64)(uintptr_t)longmsg.addr; 437 cle->c_next = NULL; 438 xdrrdma_create(xdrs, longmsg.addr, msglen, 0, cle, 439 XDR_ENCODE, NULL); 440 cxdrp = xdrs; 441 op = RDMA_NOMSG; 442 } 443 444 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 445 /* 446 * Copy in the preserialized RPC header 447 * information. 448 */ 449 bcopy(p->cku_rpchdr, rpcmsg.addr, CKU_HDRSIZE); 450 451 /* 452 * transaction id is the 1st thing in the output 453 * buffer. 454 */ 455 /* LINTED pointer alignment */ 456 (*(uint32_t *)(rpcmsg.addr)) = p->cku_xid; 457 458 /* Skip the preserialized stuff. */ 459 XDR_SETPOS(xdrs, CKU_HDRSIZE); 460 461 /* Serialize dynamic stuff into the output buffer. */ 462 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 463 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 464 (!(*xdr_args)(xdrs, argsp))) { 465 rdma_buf_free(conn, &rpcmsg); 466 if (cle) 467 clist_free(cle); 468 p->cku_err.re_status = RPC_CANTENCODEARGS; 469 p->cku_err.re_errno = EIO; 470 cmn_err(CE_WARN, 471 "clnt_rdma_kcallit: XDR_PUTINT32/AUTH_MARSHAL/xdr_args failed"); 472 goto done; 473 } 474 p->cku_outsz = XDR_GETPOS(xdrs); 475 } else { 476 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE]; 477 IXDR_PUT_U_INT32(uproc, procnum); 478 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 479 XDR_SETPOS(xdrs, 0); 480 481 /* Serialize the procedure number and the arguments. */ 482 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr, 483 CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) { 484 if (longmsg.addr != xdrs->x_base) { 485 longmsg.addr = xdrs->x_base; 486 longmsg.len = xdr_getbufsize(xdrs); 487 } 488 rdma_buf_free(conn, &longmsg); 489 clist_free(cle); 490 p->cku_err.re_status = RPC_CANTENCODEARGS; 491 p->cku_err.re_errno = EIO; 492 cmn_err(CE_WARN, 493 "clnt_rdma_kcallit: AUTH_WRAP failed"); 494 goto done; 495 } 496 /* 497 * If we had to allocate a new buffer while encoding 498 * then update the addr and len. 499 */ 500 if (longmsg.addr != xdrs->x_base) { 501 longmsg.addr = xdrs->x_base; 502 longmsg.len = xdr_getbufsize(xdrs); 503 } 504 505 /* 506 * If it so happens that the encoded message is after all 507 * not long enough to be a Long RPC then allocate a 508 * SEND_BUFFER and copy the encoded message into it. 509 */ 510 p->cku_outsz = XDR_GETPOS(xdrs); 511 if (p->cku_outsz > RPC_MSG_SZ) { 512 rpcmsg.type = CHUNK_BUFFER; 513 rpcmsg.addr = longmsg.addr; 514 rpcmsg.len = longmsg.len; 515 } else { 516 clist_free(cle); 517 XDR_DESTROY(cxdrp); 518 cxdrp = NULL; 519 /* 520 * Get a pre-allocated buffer for rpc call 521 */ 522 rpcmsg.type = SEND_BUFFER; 523 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) { 524 p->cku_err.re_status = RPC_CANTSEND; 525 p->cku_err.re_errno = EIO; 526 RCSTAT_INCR(rcnomem); 527 cmn_err(CE_WARN, 528 "clnt_rdma_kcallit: no buffers!"); 529 rdma_buf_free(conn, &longmsg); 530 goto done; 531 } 532 bcopy(longmsg.addr, rpcmsg.addr, p->cku_outsz); 533 xdrrdma_create(xdrs, rpcmsg.addr, p->cku_outsz, 0, 534 NULL, XDR_ENCODE, NULL); 535 cxdrp = xdrs; 536 rdma_buf_free(conn, &longmsg); 537 op = RDMA_MSG; 538 } 539 } 540 541 cl = xdrrdma_clist(xdrs); 542 543 /* 544 * Update the chunk size information for the Long RPC msg. 545 */ 546 if (cl && op == RDMA_NOMSG) 547 cl->c_len = p->cku_outsz; 548 549 /* 550 * Set up the RDMA chunk message 551 */ 552 vers = RPCRDMA_VERS; 553 clmsg.type = SEND_BUFFER; 554 if (RDMA_BUF_ALLOC(conn, &clmsg)) { 555 p->cku_err.re_status = RPC_CANTSEND; 556 p->cku_err.re_errno = EIO; 557 rdma_buf_free(conn, &rpcmsg); 558 RCSTAT_INCR(rcnomem); 559 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffers!!"); 560 goto done; 561 } 562 xdrs = &p->cku_outxdr; 563 xdrmem_create(xdrs, clmsg.addr, clmsg.len, XDR_ENCODE); 564 /* 565 * Treat xid as opaque (xid is the first entity 566 * in the rpc rdma message). 567 */ 568 (*(uint32_t *)clmsg.addr) = p->cku_xid; 569 /* Skip xid and set the xdr position accordingly. */ 570 XDR_SETPOS(xdrs, sizeof (uint32_t)); 571 (void) xdr_u_int(xdrs, &vers); 572 (void) xdr_u_int(xdrs, &op); 573 574 /* 575 * Now XDR the chunk list 576 */ 577 if (cl != NULL) { 578 579 /* 580 * Register the chunks in the list 581 */ 582 status = clist_register(conn, cl, 1); 583 if (status != RDMA_SUCCESS) { 584 cmn_err(CE_WARN, 585 "clnt_rdma_kcallit: clist register failed"); 586 rdma_buf_free(conn, &clmsg); 587 rdma_buf_free(conn, &rpcmsg); 588 clist_free(cl); 589 p->cku_err.re_status = RPC_CANTSEND; 590 p->cku_err.re_errno = EIO; 591 goto done; 592 } 593 594 } 595 (void) xdr_do_clist(xdrs, &cl); 596 597 /* 598 * Start with the RDMA header and clist (if any) 599 */ 600 sendlist = NULL; 601 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle, 602 clmsg.addr, NULL, NULL); 603 604 /* 605 * Put the RPC call message in the send list if small RPC 606 */ 607 if (op == RDMA_MSG) { 608 clist_add(&sendlist, 0, p->cku_outsz, &rpcmsg.handle, 609 rpcmsg.addr, NULL, NULL); 610 } else { 611 /* Long RPC already in chunk list */ 612 RCSTAT_INCR(rclongrpcs); 613 } 614 615 /* 616 * Set up a reply buffer ready for the reply 617 */ 618 status = rdma_clnt_postrecv(conn, p->cku_xid); 619 if (status != RDMA_SUCCESS) { 620 rdma_buf_free(conn, &clmsg); 621 rdma_buf_free(conn, &rpcmsg); 622 if (cl) { 623 (void) clist_deregister(conn, cl, 1);
631 /* 632 * sync the memory for dma 633 */ 634 if (cl != NULL) { 635 status = clist_syncmem(conn, cl, 1); 636 if (status != RDMA_SUCCESS) { 637 rdma_buf_free(conn, &clmsg); 638 rdma_buf_free(conn, &rpcmsg); 639 (void) clist_deregister(conn, cl, 1); 640 clist_free(cl); 641 clist_free(sendlist); 642 p->cku_err.re_status = RPC_CANTSEND; 643 p->cku_err.re_errno = EIO; 644 goto done; 645 } 646 } 647 648 /* 649 * Send the call message to the server 650 */ 651 status = RDMA_SEND(conn, sendlist, p->cku_xid); 652 if (status != RDMA_SUCCESS) { 653 if (cl) { 654 (void) clist_deregister(conn, cl, 1); 655 clist_free(cl); 656 /* 657 * If this was a long RPC message, need 658 * to free that buffer. 659 */ 660 if (rpcmsg.type == CHUNK_BUFFER) 661 rdma_buf_free(conn, &rpcmsg); 662 } 663 clist_free(sendlist); 664 p->cku_err.re_status = RPC_CANTSEND; 665 p->cku_err.re_errno = EIO; 666 goto done; 667 } else { 668 /* 669 * RDMA plugin now owns the send msg buffers. 670 * Clear them out and don't free them here. 671 */ 672 clmsg.addr = NULL; 673 if (rpcmsg.type == SEND_BUFFER) 674 rpcmsg.addr = NULL; 675 } 676 clist_free(sendlist); 677 #ifdef DEBUG 678 if (rdma_clnt_debug) { 679 printf("clnt_rdma_kcallit: send request xid %u\n", p->cku_xid); 680 } 681 #endif 682 683 /* 684 * Recv rpc reply 685 */ 686 status = RDMA_RECV(conn, &recvlist, p->cku_xid); 687 688 /* 689 * Deregister chunks sent. Do this only after the reply 690 * is received as that is a sure indication that the 691 * remote end has completed RDMA of the chunks. 692 */ 693 if (cl != NULL) { 694 /* 695 * Deregister the chunks 696 */ 697 (void) clist_deregister(conn, cl, 1); 698 clist_free(cl); 699 /* 700 * If long RPC free chunk 701 */ 702 rdma_buf_free(conn, &rpcmsg); 703 } 704 705 /* 706 * Now check recv status 707 */ 708 if (status != 0) { 709 #ifdef DEBUG 710 if (rdma_clnt_debug) 711 cmn_err(CE_NOTE, 712 "clnt_rdma_kcallit: reply failed %u status %d", 713 p->cku_xid, status); 714 #endif 715 if (status == RDMA_INTR) { 716 p->cku_err.re_status = RPC_INTR; 717 p->cku_err.re_errno = EINTR; 718 RCSTAT_INCR(rcintrs); 719 } else if (status == RPC_TIMEDOUT) { 720 p->cku_err.re_status = RPC_TIMEDOUT; 721 p->cku_err.re_errno = ETIMEDOUT; 722 RCSTAT_INCR(rctimeouts); 723 } else { 724 p->cku_err.re_status = RPC_CANTRECV; 725 p->cku_err.re_errno = EIO; 726 } 727 goto done; 728 } 729 #ifdef DEBUG 730 if (rdma_clnt_debug) 731 printf("clnt_rdma_kcallit: got response xid %u\n", p->cku_xid); 732 #endif 733 /* 734 * Process the reply message. 735 * 736 * First the chunk list (if any) 737 */ 738 xdrs = &(p->cku_inxdr); 739 xdrmem_create(xdrs, (caddr_t)(uintptr_t)recvlist->c_saddr, 740 recvlist->c_len, XDR_DECODE); 741 /* 742 * Treat xid as opaque (xid is the first entity 743 * in the rpc rdma message). 744 */ 745 xid = *(uint32_t *)(uintptr_t)recvlist->c_saddr; 746 /* Skip xid and set the xdr position accordingly. */ 747 XDR_SETPOS(xdrs, sizeof (uint32_t)); 748 (void) xdr_u_int(xdrs, &vers); 749 (void) xdr_u_int(xdrs, &op); 750 (void) xdr_do_clist(xdrs, &cl); 751 off = xdr_getpos(xdrs); 752 753 /* 754 * Now the RPC reply message itself. If the reply 755 * came as a chunk item, then RDMA the reply over. 756 */ 757 xdrs = &replxdr; 758 if (cl && op == RDMA_NOMSG) { 759 struct clist *cle = cl; 760 761 rpcreply.type = CHUNK_BUFFER; 762 rpcreply.addr = kmem_alloc(cle->c_len, KM_SLEEP); 763 rpcreply.len = cle->c_len; 764 cle->c_daddr = (uint64)(uintptr_t)rpcreply.addr; 765 cl = cl->c_next; 766 cle->c_next = NULL; 767 768 /* 769 * Register the rpc reply chunk destination 770 */ 771 status = clist_register(conn, cle, 0); 772 if (status) { 773 rdma_buf_free(conn, &rpcreply); 774 clist_free(cle); 775 p->cku_err.re_status = RPC_CANTDECODERES; 776 p->cku_err.re_errno = EIO; 777 cmn_err(CE_WARN, 778 "clnt_rdma_kcallit: clist_register failed"); 779 goto rdma_done; 780 } 781 782 /* 783 * Now read rpc reply in 784 */ 785 #ifdef DEBUG 786 if (rdma_clnt_debug) 787 printf("clnt_rdma_kcallit: read chunk, len %d, xid %u, \ 788 reply xid %u\n", cle->c_len, p->cku_xid, xid); 789 #endif 790 status = RDMA_READ(conn, cle, WAIT); 791 if (status) { 792 (void) clist_deregister(conn, cle, 0); 793 rdma_buf_free(conn, &rpcreply); 794 clist_free(cle); 795 p->cku_err.re_status = RPC_CANTDECODERES; 796 p->cku_err.re_errno = EIO; 797 cmn_err(CE_WARN, 798 "clnt_rdma_kcallit: RDMA_READ failed"); 799 goto rdma_done; 800 } 801 802 /* 803 * sync the memory for dma 804 */ 805 status = clist_syncmem(conn, cle, 0); 806 if (status != RDMA_SUCCESS) { 807 (void) clist_deregister(conn, cle, 0); 808 rdma_buf_free(conn, &rpcreply); 809 clist_free(cle); 810 p->cku_err.re_status = RPC_CANTDECODERES; 811 p->cku_err.re_errno = EIO; 812 goto rdma_done; 813 } 814 815 /* 816 * Deregister the Long RPC chunk 817 */ 818 (void) clist_deregister(conn, cle, 0); 819 clist_free(cle); 820 xdrrdma_create(xdrs, rpcreply.addr, rpcreply.len, 0, cl, 821 XDR_DECODE, conn); 822 rxdrp = xdrs; 823 } else { 824 rpcreply.addr = NULL; 825 xdrrdma_create(xdrs, 826 (caddr_t)(uintptr_t)(recvlist->c_saddr + off), 827 recvlist->c_len - off, 0, cl, XDR_DECODE, conn); 828 rxdrp = xdrs; 829 } 830 831 reply_msg.rm_direction = REPLY; 832 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 833 reply_msg.acpted_rply.ar_stat = SUCCESS; 834 reply_msg.acpted_rply.ar_verf = _null_auth; 835 /* 836 * xdr_results will be done in AUTH_UNWRAP. 837 */ 838 reply_msg.acpted_rply.ar_results.where = NULL; 839 reply_msg.acpted_rply.ar_results.proc = xdr_void; 840 841 /* 842 * Decode and validate the response. 843 */ 844 if (xdr_replymsg(xdrs, &reply_msg)) { 845 enum clnt_stat re_status; 846 847 _seterr_reply(&reply_msg, &(p->cku_err)); 848 849 re_status = p->cku_err.re_status; 850 if (re_status == RPC_SUCCESS) {
895 break; 896 } 897 RPCLOG(1, "clnt_rdma_kcallit : " 898 "authentication failed with " 899 "RPC_AUTHERROR of type %d\n", 900 p->cku_err.re_why); 901 } 902 cmn_err(CE_WARN, 903 "clnt_rdma_kcallit: RPC failed"); 904 905 } 906 } else { 907 p->cku_err.re_status = RPC_CANTDECODERES; 908 p->cku_err.re_errno = EIO; 909 cmn_err(CE_WARN, "clnt_rdma_kcallit: xdr_replymsg failed"); 910 } 911 912 /* 913 * If rpc reply is in a chunk, free it now. 914 */ 915 if (rpcreply.addr != NULL) 916 rdma_buf_free(conn, &rpcreply); 917 918 rdma_done: 919 if ((cl != NULL) || (op == RDMA_NOMSG)) { 920 rdma_buf_t donemsg; 921 922 /* 923 * Free the list holding the chunk info 924 */ 925 if (cl) { 926 clist_free(cl); 927 cl = NULL; 928 } 929 930 /* 931 * Tell the server that the reads are done 932 */ 933 donemsg.type = SEND_BUFFER; 934 if (RDMA_BUF_ALLOC(conn, &donemsg)) { 935 p->cku_err.re_status = RPC_CANTSEND; 936 p->cku_err.re_errno = EIO; 937 RCSTAT_INCR(rcnomem); 938 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffer"); 939 goto done; 940 } 941 xdrs = &p->cku_outxdr; 942 xdrmem_create(xdrs, donemsg.addr, donemsg.len, XDR_ENCODE); 943 vers = RPCRDMA_VERS; 944 op = RDMA_DONE; 945 946 /* 947 * Treat xid as opaque (xid is the first entity 948 * in the rpc rdma message). 949 */ 950 (*(uint32_t *)donemsg.addr) = p->cku_xid; 951 /* Skip xid and set the xdr position accordingly. */ 952 XDR_SETPOS(xdrs, sizeof (uint32_t)); 953 if (!xdr_u_int(xdrs, &vers) || 954 !xdr_u_int(xdrs, &op)) { 955 cmn_err(CE_WARN, 956 "clnt_rdma_kcallit: xdr_u_int failed"); 957 rdma_buf_free(conn, &donemsg); 958 goto done; 959 } 960 961 sendlist = NULL; 962 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &donemsg.handle, 963 donemsg.addr, NULL, NULL); 964 965 status = RDMA_SEND(conn, sendlist, p->cku_xid); 966 if (status != RDMA_SUCCESS) { 967 cmn_err(CE_WARN, 968 "clnt_rdma_kcallit: RDMA_SEND failed xid %u", 969 p->cku_xid); 970 } 971 #ifdef DEBUG 972 else { 973 if (rdma_clnt_debug) 974 printf("clnt_rdma_kcallit: sent RDMA_DONE xid %u\n", 975 p->cku_xid); 976 } 977 #endif 978 clist_free(sendlist); 979 } 980 981 done: 982 if (cxdrp) 983 XDR_DESTROY(cxdrp); 984 if (rxdrp) { 985 (void) xdr_rpc_free_verifier(rxdrp, &reply_msg); 986 XDR_DESTROY(rxdrp); 987 } 988 989 if (recvlist) { 990 rdma_buf_t recvmsg; 991 992 recvmsg.addr = (caddr_t)(uintptr_t)recvlist->c_saddr; 993 recvmsg.type = RECV_BUFFER; 994 RDMA_BUF_FREE(conn, &recvmsg); 995 clist_free(recvlist); 996 } 997 RDMA_REL_CONN(conn); 998 if (p->cku_err.re_status != RPC_SUCCESS) { 999 RCSTAT_INCR(rcbadcalls); 1000 } 1001 return (p->cku_err.re_status); 1002 } 1003 1004 /* ARGSUSED */ 1005 static void 1006 clnt_rdma_kabort(CLIENT *h) 1007 { 1008 } 1009 1010 static void 1011 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err) 1012 { 1013 struct cku_private *p = htop(h); 1014 1015 *err = p->cku_err; 1016 } 1017 1018 static bool_t 1019 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 1020 { 1021 struct cku_private *p = htop(h); 1022 XDR *xdrs; 1023
1036 /* ARGSUSED */ 1037 static int 1038 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1039 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg, 1040 uint32_t xid) 1041 { 1042 RCSTAT_INCR(rctimers); 1043 return (0); 1044 } 1045 1046 int 1047 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf) 1048 { 1049 rdma_registry_t *rp; 1050 void *handle = NULL; 1051 struct knetconfig *knc; 1052 char *pf, *p; 1053 rdma_stat status; 1054 int error = 0; 1055 1056 if (!INGLOBALZONE(curproc)) 1057 return (-1); 1058 /* 1059 * modload the RDMA plugins if not already done. 1060 */ 1061 if (!rdma_modloaded) { 1062 mutex_enter(&rdma_modload_lock); 1063 if (!rdma_modloaded) { 1064 error = rdma_modload(); 1065 } 1066 mutex_exit(&rdma_modload_lock); 1067 if (error) 1068 return (-1); 1069 } 1070 1071 if (!rdma_dev_available) 1072 return (-1); 1073 1074 rw_enter(&rdma_lock, RW_READER); 1075 rp = rdma_mod_head;
1085 if (addr_type == AF_INET) 1086 (void) strncpy(pf, NC_INET, KNC_STRSIZE); 1087 else if (addr_type == AF_INET6) 1088 (void) strncpy(pf, NC_INET6, KNC_STRSIZE); 1089 pf[KNC_STRSIZE - 1] = '\0'; 1090 1091 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE); 1092 p[KNC_STRSIZE - 1] = '\0'; 1093 1094 knc->knc_protofmly = pf; 1095 knc->knc_proto = p; 1096 knc->knc_rdev = (dev_t)handle; 1097 *knconf = knc; 1098 rw_exit(&rdma_lock); 1099 return (0); 1100 } 1101 rp = rp->r_next; 1102 } 1103 rw_exit(&rdma_lock); 1104 return (-1); 1105 }


14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 /* 29 * Portions of this source code were derived from Berkeley 30 * 4.3 BSD under license from the Regents of the University of 31 * California. 32 */ 33 34 /* Copyright (c) 2006, The Ohio State University. All rights reserved. 35 * 36 * Portions of this source code is developed by the team members of 37 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 38 * headed by Professor Dhabaleswar K. (DK) Panda. 39 * 40 * Acknowledgements to contributions from developors: 41 * Ranjit Noronha: noronha@cse.ohio-state.edu 42 * Lei Chai : chail@cse.ohio-state.edu 43 * Weikuan Yu : yuw@cse.ohio-state.edu 44 * 45 */ 46 47 #pragma ident "@(#)clnt_rdma.c 1.10 05/07/26 SMI" 48 49 #include <sys/param.h> 50 #include <sys/types.h> 51 #include <sys/user.h> 52 #include <sys/systm.h> 53 #include <sys/sysmacros.h> 54 #include <sys/errno.h> 55 #include <sys/kmem.h> 56 #include <sys/debug.h> 57 #include <sys/systm.h> 58 #include <sys/kstat.h> 59 #include <sys/t_lock.h> 60 #include <sys/ddi.h> 61 #include <sys/cmn_err.h> 62 #include <sys/time.h> 63 #include <sys/isa_defs.h> 64 #include <sys/zone.h> 65 66 #include <rpc/types.h> 67 #include <rpc/xdr.h> 68 #include <rpc/auth.h> 69 #include <rpc/clnt.h> 70 #include <rpc/rpc_msg.h> 71 #include <rpc/rpc_rdma.h> 72 #include <nfs/nfs.h> 73 74 #define CLNT_CREDIT_LOW (5) 75 76 xdrproc_t x_READ3args = NULL_xdrproc_t; 77 xdrproc_t x_READ3res = NULL_xdrproc_t; 78 xdrproc_t x_READ3vres = NULL_xdrproc_t; 79 xdrproc_t x_READ3uiores = NULL_xdrproc_t; 80 81 static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST; 82 83 int rdma_wlist_verbose_debug = 0; 84 int rdma_wlist_memreg_debug = 0; 85 int rdma_wlist_clnt_debug = 0; 86 int rdma_wlist_svc_debug = 0; 87 int rdma_wlist_xdr_debug = 0; 88 int rdma_wlist_pglck_debug = 0; 89 int credit_control_debug = 0; 90 int rdma_long_reply_debug = 0; 91 int rdma_xdr_long_reply_debug = 0; 92 93 struct clist empty_cl = {0}; 94 95 static void clnt_read3args_make_wlist(caddr_t, struct clist **, xdrproc_t, uint_t *); 96 static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *, 97 XDR *, xdrproc_t, caddr_t); 98 static int clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *, 99 XDR **, uint_t *); 100 static int clnt_setup_rlist(CONN *, XDR *, struct clist **); 101 static int clnt_setup_wlist(CONN *, rpcproc_t, struct clist **, 102 caddr_t, xdrproc_t, XDR *); 103 static int clnt_setup_long_reply(CONN *, rpcproc_t, struct clist *, 104 XDR *, bool_t *); 105 #ifdef DYNAMIC_CREDIT_CONTROL 106 static void clnt_compute_credit(CONN *, uint32_t *); 107 #endif 108 static void clnt_check_credit(CONN *); 109 static void clnt_return_credit(CONN *); 110 static int clnt_decode_long_reply(CONN *, rpcproc_t, struct clist *, 111 struct clist *, XDR *, XDR **, struct clist *, 112 struct clist *, uint_t,uint_t); 113 114 static void clnt_update_credit(CONN *,uint32_t); 115 static void check_dereg_wlist(CONN *, struct clist *); 116 117 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 118 caddr_t, xdrproc_t, caddr_t, struct timeval); 119 static void clnt_rdma_kabort(CLIENT *); 120 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *); 121 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t); 122 static void clnt_rdma_kdestroy(CLIENT *); 123 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *); 124 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *, 125 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 126 127 /* 128 * Operations vector for RDMA based RPC 129 */ 130 static struct clnt_ops rdma_clnt_ops = { 131 clnt_rdma_kcallit, /* do rpc call */ 132 clnt_rdma_kabort, /* abort call */ 133 clnt_rdma_kerror, /* return error status */ 134 clnt_rdma_kfreeres, /* free results */ 135 clnt_rdma_kdestroy, /* destroy rpc handle */ 136 clnt_rdma_kcontrol, /* the ioctl() of rpc */ 137 clnt_rdma_ksettimers, /* set retry timers */ 138 }; 139 140 /* 141 * The size of the preserialized RPC header information. 142 */ 143 #define CKU_HDRSIZE 20 144 #define CLNT_RDMA_SUCCESS 0 145 #define CLNT_RDMA_FAIL -99 146 147 /* 148 * Per RPC RDMA endpoint details 149 */ 150 typedef struct cku_private { 151 CLIENT cku_client; /* client handle */ 152 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */ 153 void *cku_rd_handle; /* underlying RDMA device */ 154 struct netbuf cku_addr; /* remote netbuf address */ 155 int cku_addrfmly; /* for finding addr_type */ 156 struct rpc_err cku_err; /* error status */ 157 struct cred *cku_cred; /* credentials */ 158 XDR cku_outxdr; /* xdr stream for output */ 159 uint32_t cku_outsz; 160 XDR cku_inxdr; /* xdr stream for input */ 161 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */ 162 uint32_t cku_xid; /* current XID */ 163 } cku_private_t; 164 165 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */
326 rw_exit(&rdma_lock); 327 328 /* 329 * Set up the rpc information 330 */ 331 p->cku_cred = cred; 332 p->cku_xid = 0; 333 334 if (p->cku_addr.maxlen < raddr->len) { 335 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 336 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 337 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 338 p->cku_addr.maxlen = raddr->maxlen; 339 } 340 341 p->cku_addr.len = raddr->len; 342 bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 343 h->cl_ops = &rdma_clnt_ops; 344 } 345 346 static int clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum, 347 rdma_buf_t *rpcmsg, XDR *xdrs, 348 xdrproc_t xdr_args, caddr_t argsp) 349 { 350 cku_private_t *p = htop(h); 351 352 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 353 /* 354 * Copy in the preserialized RPC header 355 * information. 356 */ 357 bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE); 358 359 /* 360 * transaction id is the 1st thing in the output 361 * buffer. 362 */ 363 /* LINTED pointer alignment */ 364 (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid; 365 366 /* Skip the preserialized stuff. */ 367 XDR_SETPOS(xdrs, CKU_HDRSIZE); 368 369 /* Serialize dynamic stuff into the output buffer. */ 370 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 371 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 372 (!(*xdr_args)(xdrs, argsp))) { 373 cmn_err(CE_WARN,"Failed to serialize dynamic arguments\n"); 374 return CLNT_RDMA_FAIL; 375 } 376 p->cku_outsz = XDR_GETPOS(xdrs); 377 } else { 378 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE]; 379 IXDR_PUT_U_INT32(uproc, procnum); 380 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 381 XDR_SETPOS(xdrs, 0); 382 383 /* Serialize the procedure number and the arguments. */ 384 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr, 385 CKU_HDRSIZE+4, xdrs, NULL, NULL) || 386 !(*xdr_args)(xdrs, argsp)) { 387 if (rpcmsg->addr != xdrs->x_base) { 388 rpcmsg->addr = xdrs->x_base; 389 rpcmsg->len = xdr_getbufsize(xdrs); 390 } 391 cmn_err(CE_WARN,"Failed to serialize procedure number and the arguments.\n"); 392 return CLNT_RDMA_FAIL; 393 } 394 /* 395 * If we had to allocate a new buffer while encoding 396 * then update the addr and len. 397 */ 398 if (rpcmsg->addr != xdrs->x_base) { 399 rpcmsg->addr = xdrs->x_base; 400 rpcmsg->len = xdr_getbufsize(xdrs); 401 } 402 403 p->cku_outsz = XDR_GETPOS(xdrs); 404 } 405 406 return CLNT_RDMA_SUCCESS; 407 } 408 409 static int clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg, 410 XDR **xdrs, uint_t *op) 411 { 412 cku_private_t *p = htop(h); 413 uint_t vers; 414 uint32_t rdma_credit = rdma_bufs_rqst; 415 416 vers = RPCRDMA_VERS; 417 clmsg->type = SEND_BUFFER; 418 419 #ifdef DYNAMIC_CREDIT_CONTROL 420 clnt_compute_credit(conn, &rdma_credit); 421 #endif 422 423 if (RDMA_BUF_ALLOC(conn, clmsg)) { 424 return CLNT_RDMA_FAIL; 425 } 426 427 *xdrs = &p->cku_outxdr; 428 xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE); 429 430 (*(uint32_t *)clmsg->addr) = p->cku_xid; 431 XDR_SETPOS(*xdrs, sizeof (uint32_t)); 432 (void) xdr_u_int(*xdrs, &vers); 433 (void) xdr_u_int(*xdrs, &rdma_credit); 434 (void) xdr_u_int(*xdrs, op); 435 436 return CLNT_RDMA_SUCCESS; 437 } 438 439 static int clnt_setup_rlist(CONN *conn, XDR *xdrs, struct clist **cl) 440 { 441 int ret; 442 443 if (*cl != NULL) { 444 ret = clist_register(conn, *cl, 1); 445 if (ret != RDMA_SUCCESS) { 446 return CLNT_RDMA_FAIL; 447 } 448 } 449 (void) xdr_do_clist(xdrs, cl); 450 451 return CLNT_RDMA_SUCCESS; 452 } 453 454 static int clnt_setup_wlist(CONN *conn, rpcproc_t procnum, 455 struct clist **rpccall_wlist, caddr_t resultsp, 456 xdrproc_t xdr_results, XDR *xdrs) 457 { 458 int status; 459 uint_t num_segment = 0; 460 461 if (procnum == NFSPROC3_READ) { 462 clnt_read3args_make_wlist(resultsp, rpccall_wlist, 463 xdr_results, &num_segment); 464 status = clist_register(conn, *rpccall_wlist, 0); 465 if (status != RDMA_SUCCESS) 466 return CLNT_RDMA_FAIL; 467 } else { 468 *rpccall_wlist = NULL; 469 } 470 471 if (! xdr_encode_wlist(xdrs, *rpccall_wlist, num_segment)) 472 return CLNT_RDMA_FAIL; 473 474 return CLNT_RDMA_SUCCESS; 475 } 476 477 static int clnt_setup_long_reply(CONN *conn, rpcproc_t procnum, 478 struct clist *lrc_clist, 479 XDR *xdrs, bool_t *exists) 480 { 481 int status; 482 caddr_t addr; 483 #ifdef SERVER_REG_CACHE 484 rib_lrc_entry_t *long_reply_buf = NULL; 485 #endif 486 *exists = FALSE; 487 lrc_clist->c_daddr = NULL; 488 489 #ifdef RPC_RDMA_INLINE 490 if (lrc_clist->c_len < rdma_minchunk) 491 return CLNT_RDMA_SUCCESS; 492 #endif 493 494 if (procnum == NFSPROC3_READDIR || 495 procnum == NFSPROC3_READDIRPLUS || 496 procnum == NFSPROC3_READLINK) { 497 #ifndef SERVER_REG_CACHE 498 addr = kmem_alloc(LONG_REPLY_LEN, KM_SLEEP); 499 bzero(addr, LONG_REPLY_LEN); 500 lrc_clist->c_daddr = (uint64)addr; 501 lrc_clist->c_len = LONG_REPLY_LEN; 502 lrc_clist->c_next = NULL; 503 lrc_clist->long_reply_buf = NULL; 504 status = clist_register(conn, lrc_clist, 0); 505 #else 506 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, LONG_REPLY_LEN); 507 bzero(long_reply_buf->lrc_buf, LONG_REPLY_LEN); 508 lrc_clist->c_daddr = (uint64)long_reply_buf->lrc_buf; 509 lrc_clist->c_len = LONG_REPLY_LEN; 510 lrc_clist->c_next = NULL; 511 lrc_clist->long_reply_buf = (uint64)long_reply_buf; 512 lrc_clist->c_dmemhandle = long_reply_buf->lrc_mhandle; 513 status = clist_register(conn, lrc_clist, 0); 514 #endif 515 if(status) { 516 cmn_err(CE_WARN, "clnt_setup_long_reply: cannot register buffer"); 517 #ifndef SERVER_REG_CACHE 518 kmem_free((void*)addr, (size_t)LONG_REPLY_LEN); 519 #else 520 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf); 521 522 #endif 523 lrc_clist->c_daddr = NULL; 524 return CLNT_RDMA_FAIL; 525 } 526 *exists = TRUE; 527 } 528 529 return CLNT_RDMA_SUCCESS; 530 } 531 532 static void 533 clnt_read3args_make_wlist(caddr_t replyp, struct clist **rpccall_wlist, 534 xdrproc_t xr, uint_t *num_segment) 535 { 536 READ3uiores *ures = (READ3uiores *)replyp; 537 READ3vres *vres = (READ3vres *)replyp; 538 struct clist *rwl = NULL, *prev = NULL; 539 int i, total_length; 540 541 *rpccall_wlist = NULL; 542 543 #ifdef RPC_RDMA_INLINE 544 if (xr == x_READ3uiores) { 545 total_length = 0; 546 for(i=0; i<ures->uiop->uio_iovcnt; i++) { 547 total_length += ures->uiop->uio_iov[i].iov_len; 548 } 549 } else { 550 total_length = vres->data.data_len; 551 } 552 553 if (total_length < rdma_minchunk) 554 return; 555 #endif 556 557 /* XXX: fake a chunk threshold for the combined length for now */ 558 if (xr == x_READ3uiores) { 559 *num_segment = ures->uiop->uio_iovcnt; 560 for(i=0; i<ures->uiop->uio_iovcnt; i++) { 561 rwl = (struct clist *)kmem_zalloc(sizeof(struct clist), 562 KM_SLEEP); 563 564 rwl->c_len = ures->uiop->uio_iov[i].iov_len; 565 rwl->c_daddr = (uint64)(ures->uiop->uio_iov[i].iov_base); 566 /* 567 * if userspace address, put adspace ptr in clist. 568 * If not, then do nothing since it's already 569 * set to NULL (from empty_cl) 570 */ 571 if (ures->uiop->uio_segflg == UIO_USERSPACE) { 572 int error; 573 rwl->c_adspc = ttoproc(curthread)->p_as; 574 } else { 575 rwl->c_dpplist = (page_t **)NULL; 576 } 577 578 if(prev == NULL) 579 prev = rwl; 580 else { 581 prev->c_next = rwl; 582 prev = rwl; 583 } 584 585 if(*rpccall_wlist == NULL) 586 *rpccall_wlist = rwl; 587 } 588 rwl->c_next = NULL; 589 } else if (xr == x_READ3vres) { 590 *num_segment = 1; 591 rwl = (struct clist *)kmem_zalloc(sizeof (struct clist), 592 KM_SLEEP); 593 *rwl = empty_cl; 594 595 rwl->c_len = vres->data.data_len; 596 rwl->c_daddr = (uint64)(vres->data.data_val); 597 598 if(*rpccall_wlist == NULL) 599 *rpccall_wlist = rwl; 600 } else { 601 /*cmn_err(CE_NOTE, "read3args_make_wlist: non READ3xr=%p", 602 (void *)xr);*/ 603 } 604 } 605 606 /* ARGSUSED */ 607 static enum clnt_stat 608 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 609 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait) 610 { 611 cku_private_t *p = htop(h); 612 int status; 613 XDR *xdrs; 614 XDR *cxdrp = NULL, callxdr; /* for xdrrdma encoding the RPC call */ 615 XDR *rxdrp = NULL, replxdr; /* for xdrrdma decoding the RPC reply */ 616 struct rpc_msg reply_msg; 617 struct clist *sendlist = NULL, *recvlist = NULL; 618 struct clist *cl = NULL, *cle = NULL, *rdma_reply = NULL; 619 uint_t vers, op; 620 uint_t off; 621 uint32_t xid; 622 uint32_t seg_array_len; 623 CONN *conn = NULL; 624 rdma_buf_t clmsg = {0}, rpcmsg = {0}; 625 int msglen; 626 clock_t ticks; 627 bool_t wlist_exists_reply = FALSE; 628 bool_t long_reply_buf_exists = FALSE; 629 630 struct clist *rpccall_wlist = NULL, *rpcreply_wlist = NULL, 631 long_reply_clist ={0}; 632 rpccall_read_t read_type; 633 rpccall_write_t write_type; 634 uint32_t rdma_credit = rdma_bufs_rqst; 635 struct clist long_reply_buf_clist = {0}; 636 637 RCSTAT_INCR(rccalls); 638 /* 639 * Get unique xid 640 */ 641 if (p->cku_xid == 0) 642 p->cku_xid = alloc_xid(); 643 644 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr, 645 p->cku_addrfmly, p->cku_rd_handle, &conn); 646 647 if (conn == NULL) { 648 /* 649 * Connect failed to server. Could be because of one 650 * of several things. In some cases we don't want 651 * the caller to retry immediately - delay before 652 * returning to caller. 653 */ 654 switch (status) { 655 case RDMA_TIMEDOUT: 656 /*
674 * down or temporary resource failure. Delay before 675 * returning to caller. 676 */ 677 ticks = clnt_rdma_min_delay * drv_usectohz(1000000); 678 p->cku_err.re_status = RPC_CANTCONNECT; 679 p->cku_err.re_errno = EIO; 680 681 if (h->cl_nosignal == TRUE) { 682 delay(ticks); 683 } else { 684 if (delay_sig(ticks) == EINTR) { 685 p->cku_err.re_status = RPC_INTR; 686 p->cku_err.re_errno = EINTR; 687 } 688 } 689 break; 690 } 691 692 return (p->cku_err.re_status); 693 } 694 695 clnt_check_credit(conn); 696 697 /* 698 * Get the size of the rpc call message. Need this 699 * to determine if the rpc call message will fit in 700 * the pre-allocated RDMA buffers. If the rpc call 701 * message length is greater that the pre-allocated 702 * buffers then, it is a Long RPC. A one time use 703 * buffer is allocated and registered for the Long 704 * RPC call. 705 */ 706 xdrs = &callxdr; 707 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT; 708 709 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 710 msglen += xdrrdma_authsize(h->cl_auth, p->cku_cred, 711 rdma_minchunk); 712 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk); 713 714 if (msglen > RPC_MSG_SZ) 715 read_type = RPCCALL_RCHUNK; 716 else 717 read_type = RPCCALL_NORCHUNK; 718 } else { 719 /* 720 * For RPCSEC_GSS since we cannot accurately presize the 721 * buffer required for encoding, we assume that its going 722 * to be a Long RPC to start with. We also create the 723 * the XDR stream with min_chunk set to 0 which instructs 724 * the XDR layer to not chunk the incoming byte stream. 725 */ 726 727 msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth); 728 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk); 729 730 if (msglen > RPC_MSG_SZ) 731 read_type = RPCCALL_RCHUNK; 732 else 733 read_type = RPCCALL_NORCHUNK; 734 } 735 736 if (read_type == RPCCALL_NORCHUNK) { 737 738 rpcmsg.type = SEND_BUFFER; 739 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) { 740 cmn_err(CE_WARN, "clnt_rdma_kcallit: no buffers!"); 741 goto done; 742 } 743 } else { 744 #ifdef SERVER_REG_CACHE 745 rib_lrc_entry_t *long_reply_buf = NULL; 746 #endif 747 rpcmsg.type = CHUNK_BUFFER; 748 #ifdef SERVER_REG_CACHE 749 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, msglen); 750 rpcmsg.addr = long_reply_buf->lrc_buf; 751 #else 752 rpcmsg.addr = kmem_zalloc(msglen, KM_SLEEP); 753 #endif 754 cle = (struct clist *)kmem_zalloc(sizeof (struct clist), 755 KM_SLEEP); 756 cle->c_xdroff = 0; 757 cle->c_len = rpcmsg.len = msglen; 758 cle->c_saddr = (uint64)(uintptr_t)rpcmsg.addr; 759 cle->c_next = NULL; 760 #ifdef SERVER_REG_CACHE 761 cle->long_reply_buf = (uint64)long_reply_buf; 762 #endif 763 } 764 765 op = cle ? RDMA_NOMSG : RDMA_MSG; 766 cxdrp = xdrs; 767 xdrrdma_create(xdrs, rpcmsg.addr, (cle ? msglen : rpcmsg.len), 768 rdma_minchunk, cle, XDR_ENCODE, NULL); 769 770 status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, xdrs, xdr_args, argsp); 771 if (status != CLNT_RDMA_SUCCESS) { 772 rdma_buf_free(conn, &rpcmsg); 773 clist_free(cle); 774 p->cku_err.re_status = RPC_CANTENCODEARGS; 775 p->cku_err.re_errno = EIO; 776 cmn_err(CE_WARN, 777 "clnt_rdma_kcallit: clnt_compose_rpcmsg failed"); 778 goto done; 779 } 780 781 /* Read chunklist (a linked list of N elements, 782 * position P (same P for all chunks of same arg!): 783 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 784 */ 785 786 cl = xdrrdma_clist(xdrs); 787 788 /* 789 * Update the chunk size information for the Long RPC msg. 790 */ 791 if (cl && op == RDMA_NOMSG) 792 cl->c_len = p->cku_outsz; 793 794 /* 795 * Prepare the header for the RDMA chunk 796 */ 797 status = clnt_compose_rdma_header(conn, h, &clmsg, &xdrs, &op); 798 if (status != CLNT_RDMA_SUCCESS) { 799 p->cku_err.re_status = RPC_CANTSEND; 800 p->cku_err.re_errno = EIO; 801 rdma_buf_free(conn, &rpcmsg); 802 clist_free(cle); 803 RCSTAT_INCR(rcnomem); 804 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffers!!"); 805 goto done; 806 } 807 808 status = clnt_setup_rlist(conn, xdrs, &cl); 809 if (status != CLNT_RDMA_SUCCESS) { 810 cmn_err(CE_WARN, "clnt_rdma_kcallit: clist register failed"); 811 rdma_buf_free(conn, &clmsg); 812 rdma_buf_free(conn, &rpcmsg); 813 clist_free(cl); 814 p->cku_err.re_status = RPC_CANTSEND; 815 p->cku_err.re_errno = EIO; 816 goto done; 817 } 818 819 /* Setup write chunk list for NFS3 READ operation 820 * Other operations will have a NULL wlist 821 */ 822 status = clnt_setup_wlist(conn, procnum, &rpccall_wlist, 823 resultsp, xdr_results, xdrs); 824 if (status != CLNT_RDMA_SUCCESS) { 825 rdma_buf_free(conn, &clmsg); 826 rdma_buf_free(conn, &rpcmsg); 827 clist_free(cl); 828 p->cku_err.re_status = RPC_CANTSEND; 829 p->cku_err.re_errno = EIO; 830 goto done; 831 } 832 833 status = clnt_setup_long_reply(conn, procnum, &long_reply_buf_clist, 834 xdrs, &long_reply_buf_exists); 835 if (status != CLNT_RDMA_SUCCESS) { 836 rdma_buf_free(conn, &clmsg); 837 rdma_buf_free(conn, &rpcmsg); 838 clist_free(cl); 839 p->cku_err.re_status = RPC_CANTSEND; 840 p->cku_err.re_errno = EIO; 841 goto done; 842 } 843 844 /* 845 * XDR encode the RDMA_REPLY write chunk 846 */ 847 seg_array_len = (long_reply_buf_exists ? 1:0); 848 (void) xdr_encode_reply_wchunk(xdrs, &long_reply_buf_clist, seg_array_len); 849 /* 850 * Start with the RDMA header and clist (if any) 851 */ 852 sendlist = NULL; 853 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle, 854 clmsg.addr, NULL, NULL); 855 /* 856 * Put the RPC call message in the send list if small RPC 857 */ 858 if (op == RDMA_MSG) { 859 clist_add(&sendlist, 0, p->cku_outsz, &rpcmsg.handle, 860 rpcmsg.addr, NULL, NULL); 861 } else { 862 /* Long RPC already in chunk list */ 863 RCSTAT_INCR(rclongrpcs); 864 } 865 866 /* 867 * Set up a reply buffer ready for the reply 868 */ 869 status = rdma_clnt_postrecv(conn, p->cku_xid); 870 if (status != RDMA_SUCCESS) { 871 rdma_buf_free(conn, &clmsg); 872 rdma_buf_free(conn, &rpcmsg); 873 if (cl) { 874 (void) clist_deregister(conn, cl, 1);
882 /* 883 * sync the memory for dma 884 */ 885 if (cl != NULL) { 886 status = clist_syncmem(conn, cl, 1); 887 if (status != RDMA_SUCCESS) { 888 rdma_buf_free(conn, &clmsg); 889 rdma_buf_free(conn, &rpcmsg); 890 (void) clist_deregister(conn, cl, 1); 891 clist_free(cl); 892 clist_free(sendlist); 893 p->cku_err.re_status = RPC_CANTSEND; 894 p->cku_err.re_errno = EIO; 895 goto done; 896 } 897 } 898 899 /* 900 * Send the call message to the server 901 */ 902 #if defined (CLNT_INTERRUPT_COAL) 903 status = RDMA_SEND_BL(conn, sendlist, p->cku_xid); 904 #else 905 status = RDMA_SEND(conn, sendlist, p->cku_xid); 906 #endif 907 if (status != RDMA_SUCCESS) { 908 if (cl) { 909 (void) clist_deregister(conn, cl, 1); 910 clist_free(cl); 911 /* 912 * If this was a long RPC message, need 913 * to free that buffer. 914 */ 915 if (rpcmsg.type == CHUNK_BUFFER) 916 rdma_buf_free(conn, &rpcmsg); 917 } 918 clist_free(sendlist); 919 p->cku_err.re_status = RPC_CANTSEND; 920 p->cku_err.re_errno = EIO; 921 goto done; 922 } else { 923 /* 924 * RDMA plugin now owns the send msg buffers. 925 * Clear them out and don't free them here. 926 */ 927 clmsg.addr = NULL; 928 if (rpcmsg.type == SEND_BUFFER) 929 rpcmsg.addr = NULL; 930 } 931 clist_free(sendlist); 932 933 /* 934 * Recv rpc reply 935 */ 936 status = RDMA_RECV(conn, &recvlist, p->cku_xid); 937 clnt_return_credit(conn); 938 939 /* 940 * Deregister chunks sent. Do this only after the reply 941 * is received as that is a sure indication that the 942 * remote end has completed RDMA of the chunks. 943 */ 944 if (cl != NULL) { 945 /* 946 * Deregister the chunks 947 */ 948 (void) clist_deregister(conn, cl, 1); 949 clist_free(cl); 950 /* 951 * If long RPC free chunk 952 */ 953 rdma_buf_free(conn, &rpcmsg); 954 } 955 956 /* 957 * Now check recv status 958 */ 959 if (status != 0) { 960 if (status == RDMA_INTR) { 961 p->cku_err.re_status = RPC_INTR; 962 p->cku_err.re_errno = EINTR; 963 RCSTAT_INCR(rcintrs); 964 } else if (status == RPC_TIMEDOUT) { 965 p->cku_err.re_status = RPC_TIMEDOUT; 966 p->cku_err.re_errno = ETIMEDOUT; 967 RCSTAT_INCR(rctimeouts); 968 } else { 969 p->cku_err.re_status = RPC_CANTRECV; 970 p->cku_err.re_errno = EIO; 971 } 972 goto done; 973 } 974 /* 975 * Process the reply message. 976 * 977 * First the chunk list (if any) 978 */ 979 xdrs = &(p->cku_inxdr); 980 xdrmem_create(xdrs, (caddr_t)(uintptr_t)recvlist->c_saddr, 981 recvlist->c_len, XDR_DECODE); 982 /* 983 * Treat xid as opaque (xid is the first entity 984 * in the rpc rdma message). 985 */ 986 xid = *(uint32_t *)(uintptr_t)recvlist->c_saddr; 987 /* Skip xid and set the xdr position accordingly. */ 988 XDR_SETPOS(xdrs, sizeof (uint32_t)); 989 (void) xdr_u_int(xdrs, &vers); 990 (void) xdr_u_int(xdrs, &rdma_credit); 991 (void) xdr_u_int(xdrs, &op); 992 (void) xdr_do_clist(xdrs, &cl); 993 clnt_update_credit(conn, rdma_credit); 994 wlist_exists_reply = FALSE; 995 if (! xdr_decode_wlist(xdrs, &rpcreply_wlist, &wlist_exists_reply)) { 996 cmn_err(CE_NOTE, 997 "clnt_rdma_kcallit: xdr_decode_wlist failed"); 998 /* XXX: what should we fail with here -- EIO? */ 999 } 1000 #ifdef RPC_RDMA_INLINE 1001 if (xdr_results == x_READ3vres) { 1002 ((READ3vres *)resultsp)->wlist = NULL; 1003 } else if (xdr_results == x_READ3uiores) { 1004 ((READ3uiores *)resultsp)->wlist = NULL; 1005 } 1006 #endif 1007 1008 if (procnum == NFSPROC3_READ) { 1009 1010 check_dereg_wlist(conn, rpccall_wlist); 1011 1012 if (wlist_exists_reply) { 1013 if (xdr_results == x_READ3vres) { 1014 ((READ3vres *)resultsp)->wlist = 1015 rpcreply_wlist; 1016 ((READ3vres *)resultsp)->wlist_len = 1017 rpcreply_wlist->c_len; 1018 } else if (xdr_results == x_READ3uiores) { 1019 ((READ3uiores *)resultsp)->wlist = 1020 rpcreply_wlist; 1021 ((READ3uiores *)resultsp)->wlist_len = 1022 rpcreply_wlist->c_len; 1023 } else { 1024 cmn_err(CE_NOTE, 1025 "unknown READ3 xdr decode fnp=%p", 1026 (void *)xdr_results); 1027 } 1028 } 1029 } else { 1030 if(wlist_exists_reply) 1031 cmn_err(CE_NOTE, 1032 "clnt_rdma_kcallit: received wlist for " 1033 "non-READ3 call. reply xdr decode fnp=%p", 1034 (void *)xdr_results); 1035 } 1036 1037 /* 1038 * The server shouldn't have sent a RDMA_SEND that 1039 * the client needs to RDMA_WRITE a reply back to 1040 * the server. So silently ignoring what the 1041 * server returns in the rdma_reply section of the 1042 * header. 1043 */ 1044 (void) xdr_decode_reply_wchunk(xdrs, &rdma_reply,conn); 1045 off = xdr_getpos(xdrs); 1046 1047 xdrs = &replxdr; 1048 if (clnt_decode_long_reply(conn, procnum, &long_reply_buf_clist, 1049 rdma_reply, xdrs, &rxdrp, 1050 cl, recvlist, op, off) != CLNT_RDMA_SUCCESS) 1051 { 1052 goto done; 1053 } 1054 reply_msg.rm_direction = REPLY; 1055 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 1056 reply_msg.acpted_rply.ar_stat = SUCCESS; 1057 reply_msg.acpted_rply.ar_verf = _null_auth; 1058 /* 1059 * xdr_results will be done in AUTH_UNWRAP. 1060 */ 1061 reply_msg.acpted_rply.ar_results.where = NULL; 1062 reply_msg.acpted_rply.ar_results.proc = xdr_void; 1063 1064 /* 1065 * Decode and validate the response. 1066 */ 1067 if (xdr_replymsg(xdrs, &reply_msg)) { 1068 enum clnt_stat re_status; 1069 1070 _seterr_reply(&reply_msg, &(p->cku_err)); 1071 1072 re_status = p->cku_err.re_status; 1073 if (re_status == RPC_SUCCESS) {
1118 break; 1119 } 1120 RPCLOG(1, "clnt_rdma_kcallit : " 1121 "authentication failed with " 1122 "RPC_AUTHERROR of type %d\n", 1123 p->cku_err.re_why); 1124 } 1125 cmn_err(CE_WARN, 1126 "clnt_rdma_kcallit: RPC failed"); 1127 1128 } 1129 } else { 1130 p->cku_err.re_status = RPC_CANTDECODERES; 1131 p->cku_err.re_errno = EIO; 1132 cmn_err(CE_WARN, "clnt_rdma_kcallit: xdr_replymsg failed"); 1133 } 1134 1135 /* 1136 * If rpc reply is in a chunk, free it now. 1137 */ 1138 done: 1139 if (long_reply_buf_exists){ 1140 (void) clist_deregister(conn, &long_reply_buf_clist, 0); 1141 #ifndef SERVER_REG_CACHE 1142 kmem_free((void *)long_reply_buf_clist.c_daddr, 1143 (size_t)long_reply_buf_clist.c_len); 1144 #else 1145 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf_clist.long_reply_buf); 1146 #endif 1147 } 1148 if (cxdrp) 1149 XDR_DESTROY(cxdrp); 1150 if (rxdrp) { 1151 (void) xdr_rpc_free_verifier(rxdrp, &reply_msg); 1152 XDR_DESTROY(rxdrp); 1153 } 1154 1155 if (recvlist) { 1156 rdma_buf_t recvmsg = {0}; 1157 recvmsg.addr = (caddr_t)(uintptr_t)recvlist->c_saddr; 1158 recvmsg.type = RECV_BUFFER; 1159 RDMA_BUF_FREE(conn, &recvmsg); 1160 clist_free(recvlist); 1161 } 1162 #if (!defined(ASYNC_CLIENT_DEREG)) 1163 if(rpccall_wlist){ 1164 kmem_free(rpccall_wlist, sizeof(clist)); 1165 } 1166 #endif 1167 1168 RDMA_REL_CONN(conn); 1169 if (p->cku_err.re_status != RPC_SUCCESS) { 1170 RCSTAT_INCR(rcbadcalls); 1171 } 1172 return (p->cku_err.re_status); 1173 } 1174 1175 static int clnt_decode_long_reply(CONN *conn, rpcproc_t procnum, 1176 struct clist *long_reply_buf_clist, 1177 struct clist *rdma_reply, XDR *xdrs, 1178 XDR **rxdrp, struct clist *cl, 1179 struct clist *recvlist, 1180 uint_t op,uint_t off) 1181 { 1182 if ( RDMA_NOMSG == op && long_reply_buf_clist->c_daddr) { 1183 if (procnum == NFSPROC3_READDIR || 1184 procnum == NFSPROC3_READDIRPLUS || 1185 procnum == NFSPROC3_READLINK) { 1186 xdrmem_destroy(xdrs); 1187 xdrrdma_create(xdrs, 1188 (caddr_t)long_reply_buf_clist->c_daddr, 1189 rdma_reply->c_len, 1190 0, 1191 NULL, 1192 XDR_DECODE, 1193 conn); 1194 1195 *rxdrp = xdrs; 1196 } else { 1197 cmn_err(CE_NOTE, "clnt_rdma_kcallit: " 1198 "wchunk buffer for wrong nfs proc"); 1199 xdrmem_destroy(xdrs); 1200 *rxdrp = NULL; 1201 } 1202 } else if (cl && RDMA_NOMSG == op) { 1203 cmn_err(CE_NOTE, "clnt_rdma_kcallit: " 1204 "Server sent a READ list in the RPC Reply"); 1205 xdrmem_destroy(xdrs); 1206 } else { 1207 xdrmem_destroy(xdrs); 1208 xdrrdma_create(xdrs, 1209 (caddr_t)(uintptr_t)(recvlist->c_saddr + off), 1210 recvlist->c_len - off, 0, cl, XDR_DECODE, conn); 1211 *rxdrp = xdrs; 1212 } 1213 return CLNT_RDMA_SUCCESS; 1214 } 1215 1216 #ifdef DYNAMIC_CREDIT_CONTROL 1217 static void clnt_compute_credit(CONN *conn, uint32_t *rdma_credit) 1218 { 1219 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1220 1221 mutex_enter(&conn->c_lock); 1222 if(cc_info->clnt_cc_granted_ops - cc_info->clnt_cc_in_flight_ops < CLNT_CREDIT_LOW) 1223 *rdma_credit = rdma_bufs_rqst + cc_info->clnt_cc_in_flight_ops / 2; 1224 mutex_exit(&conn->c_lock); 1225 } 1226 #endif 1227 1228 static void clnt_return_credit(CONN *conn) 1229 { 1230 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1231 1232 mutex_enter(&conn->c_lock); 1233 cc_info->clnt_cc_in_flight_ops--; 1234 cv_signal(&cc_info->clnt_cc_cv); 1235 mutex_exit(&conn->c_lock); 1236 } 1237 1238 static void clnt_update_credit(CONN *conn, uint32_t rdma_credit) 1239 { 1240 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1241 1242 /* 1243 * Get the granted number of buffers for credit control. 1244 */ 1245 mutex_enter(&conn->c_lock); 1246 cc_info->clnt_cc_granted_ops = rdma_credit; 1247 mutex_exit(&conn->c_lock); 1248 } 1249 1250 static void clnt_check_credit(CONN *conn) 1251 { 1252 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1253 1254 /* 1255 * Make sure we are not going over our allowed buffer use 1256 * (and make sure we have gotten a granted value before). 1257 */ 1258 mutex_enter(&conn->c_lock); 1259 while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops 1260 && cc_info->clnt_cc_granted_ops != 0) { 1261 /* 1262 * Client has maxed out its granted buffers due to 1263 * credit control. Current handling is to block and wait. 1264 */ 1265 cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock); 1266 } 1267 cc_info->clnt_cc_in_flight_ops++; 1268 mutex_exit(&conn->c_lock); 1269 } 1270 1271 /* ARGSUSED */ 1272 static void 1273 clnt_rdma_kabort(CLIENT *h) 1274 { 1275 } 1276 1277 static void 1278 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err) 1279 { 1280 struct cku_private *p = htop(h); 1281 1282 *err = p->cku_err; 1283 } 1284 1285 static bool_t 1286 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 1287 { 1288 struct cku_private *p = htop(h); 1289 XDR *xdrs; 1290
1303 /* ARGSUSED */ 1304 static int 1305 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1306 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg, 1307 uint32_t xid) 1308 { 1309 RCSTAT_INCR(rctimers); 1310 return (0); 1311 } 1312 1313 int 1314 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf) 1315 { 1316 rdma_registry_t *rp; 1317 void *handle = NULL; 1318 struct knetconfig *knc; 1319 char *pf, *p; 1320 rdma_stat status; 1321 int error = 0; 1322 1323 mutex_enter(&rdma_modload_lock); 1324 error = rdma_modload(); 1325 mutex_exit(&rdma_modload_lock); 1326 1327 if (!INGLOBALZONE(curproc)) 1328 return (-1); 1329 /* 1330 * modload the RDMA plugins if not already done. 1331 */ 1332 if (!rdma_modloaded) { 1333 mutex_enter(&rdma_modload_lock); 1334 if (!rdma_modloaded) { 1335 error = rdma_modload(); 1336 } 1337 mutex_exit(&rdma_modload_lock); 1338 if (error) 1339 return (-1); 1340 } 1341 1342 if (!rdma_dev_available) 1343 return (-1); 1344 1345 rw_enter(&rdma_lock, RW_READER); 1346 rp = rdma_mod_head;
1356 if (addr_type == AF_INET) 1357 (void) strncpy(pf, NC_INET, KNC_STRSIZE); 1358 else if (addr_type == AF_INET6) 1359 (void) strncpy(pf, NC_INET6, KNC_STRSIZE); 1360 pf[KNC_STRSIZE - 1] = '\0'; 1361 1362 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE); 1363 p[KNC_STRSIZE - 1] = '\0'; 1364 1365 knc->knc_protofmly = pf; 1366 knc->knc_proto = p; 1367 knc->knc_rdev = (dev_t)handle; 1368 *knconf = knc; 1369 rw_exit(&rdma_lock); 1370 return (0); 1371 } 1372 rp = rp->r_next; 1373 } 1374 rw_exit(&rdma_lock); 1375 return (-1); 1376 } 1377 1378 static void 1379 check_dereg_wlist(CONN *conn, clist *rwc) 1380 { 1381 if (rwc == NULL) 1382 return; 1383 1384 if (rwc) { 1385 if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) { 1386 int status; 1387 #if defined(ASYNC_CLIENT_DEREG) 1388 /* Add in an entry to rqueue */ 1389 INSERT_QUEUE(conn, rwc); 1390 #else 1391 status = clist_deregister(conn, rwc, FALSE); 1392 if (status != RDMA_SUCCESS) { 1393 cmn_err(CE_NOTE, "dereg_wlist failed." 1394 "status=%d", status); 1395 } 1396 #endif 1397 } 1398 1399 } 1400 }