1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 /* 29 * Portions of this source code were derived from Berkeley 30 * 4.3 BSD under license from the Regents of the University of 31 * California. 32 */ 33 34 /* Copyright (c) 2006, The Ohio State University. All rights reserved. 35 * 36 * Portions of this source code is developed by the team members of 37 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 38 * headed by Professor Dhabaleswar K. (DK) Panda. 39 * 40 * Acknowledgements to contributions from developors: 41 * Ranjit Noronha: noronha@cse.ohio-state.edu 42 * Lei Chai : chail@cse.ohio-state.edu 43 * Weikuan Yu : yuw@cse.ohio-state.edu 44 * 45 */ 46 47 #pragma ident "@(#)clnt_rdma.c 1.10 05/07/26 SMI" 48 49 #include <sys/param.h> 50 #include <sys/types.h> 51 #include <sys/user.h> 52 #include <sys/systm.h> 53 #include <sys/sysmacros.h> 54 #include <sys/errno.h> 55 #include <sys/kmem.h> 56 #include <sys/debug.h> 57 #include <sys/systm.h> 58 #include <sys/kstat.h> 59 #include <sys/t_lock.h> 60 #include <sys/ddi.h> 61 #include <sys/cmn_err.h> 62 #include <sys/time.h> 63 #include <sys/isa_defs.h> 64 #include <sys/zone.h> 65 66 #include <rpc/types.h> 67 #include <rpc/xdr.h> 68 #include <rpc/auth.h> 69 #include <rpc/clnt.h> 70 #include <rpc/rpc_msg.h> 71 #include <rpc/rpc_rdma.h> 72 #include <nfs/nfs.h> 73 74 #define CLNT_CREDIT_LOW (5) 75 76 xdrproc_t x_READ3args = NULL_xdrproc_t; 77 xdrproc_t x_READ3res = NULL_xdrproc_t; 78 xdrproc_t x_READ3vres = NULL_xdrproc_t; 79 xdrproc_t x_READ3uiores = NULL_xdrproc_t; 80 81 static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST; 82 83 int rdma_wlist_verbose_debug = 0; 84 int rdma_wlist_memreg_debug = 0; 85 int rdma_wlist_clnt_debug = 0; 86 int rdma_wlist_svc_debug = 0; 87 int rdma_wlist_xdr_debug = 0; 88 int rdma_wlist_pglck_debug = 0; 89 int credit_control_debug = 0; 90 int rdma_long_reply_debug = 0; 91 int rdma_xdr_long_reply_debug = 0; 92 93 struct clist empty_cl = {0}; 94 95 static void clnt_read3args_make_wlist(caddr_t, struct clist **, xdrproc_t, uint_t *); 96 static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *, 97 XDR *, xdrproc_t, caddr_t); 98 static int clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *, 99 XDR **, uint_t *); 100 static int clnt_setup_rlist(CONN *, XDR *, struct clist **); 101 static int clnt_setup_wlist(CONN *, rpcproc_t, struct clist **, 102 caddr_t, xdrproc_t, XDR *); 103 static int clnt_setup_long_reply(CONN *, rpcproc_t, struct clist *, 104 XDR *, bool_t *); 105 #ifdef DYNAMIC_CREDIT_CONTROL 106 static void clnt_compute_credit(CONN *, uint32_t *); 107 #endif 108 static void clnt_check_credit(CONN *); 109 static void clnt_return_credit(CONN *); 110 static int clnt_decode_long_reply(CONN *, rpcproc_t, struct clist *, 111 struct clist *, XDR *, XDR **, struct clist *, 112 struct clist *, uint_t,uint_t); 113 114 static void clnt_update_credit(CONN *,uint32_t); 115 static void check_dereg_wlist(CONN *, struct clist *); 116 117 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 118 caddr_t, xdrproc_t, caddr_t, struct timeval); 119 static void clnt_rdma_kabort(CLIENT *); 120 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *); 121 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t); 122 static void clnt_rdma_kdestroy(CLIENT *); 123 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *); 124 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *, 125 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 126 127 /* 128 * Operations vector for RDMA based RPC 129 */ 130 static struct clnt_ops rdma_clnt_ops = { 131 clnt_rdma_kcallit, /* do rpc call */ 132 clnt_rdma_kabort, /* abort call */ 133 clnt_rdma_kerror, /* return error status */ 134 clnt_rdma_kfreeres, /* free results */ 135 clnt_rdma_kdestroy, /* destroy rpc handle */ 136 clnt_rdma_kcontrol, /* the ioctl() of rpc */ 137 clnt_rdma_ksettimers, /* set retry timers */ 138 }; 139 140 /* 141 * The size of the preserialized RPC header information. 142 */ 143 #define CKU_HDRSIZE 20 144 #define CLNT_RDMA_SUCCESS 0 145 #define CLNT_RDMA_FAIL -99 146 147 /* 148 * Per RPC RDMA endpoint details 149 */ 150 typedef struct cku_private { 151 CLIENT cku_client; /* client handle */ 152 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */ 153 void *cku_rd_handle; /* underlying RDMA device */ 154 struct netbuf cku_addr; /* remote netbuf address */ 155 int cku_addrfmly; /* for finding addr_type */ 156 struct rpc_err cku_err; /* error status */ 157 struct cred *cku_cred; /* credentials */ 158 XDR cku_outxdr; /* xdr stream for output */ 159 uint32_t cku_outsz; 160 XDR cku_inxdr; /* xdr stream for input */ 161 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */ 162 uint32_t cku_xid; /* current XID */ 163 } cku_private_t; 164 165 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */ 166 static int clnt_rdma_min_delay = CLNT_RDMA_DELAY; 167 168 struct { 169 kstat_named_t rccalls; 170 kstat_named_t rcbadcalls; 171 kstat_named_t rcbadxids; 172 kstat_named_t rctimeouts; 173 kstat_named_t rcnewcreds; 174 kstat_named_t rcbadverfs; 175 kstat_named_t rctimers; 176 kstat_named_t rccantconn; 177 kstat_named_t rcnomem; 178 kstat_named_t rcintrs; 179 kstat_named_t rclongrpcs; 180 } rdmarcstat = { 181 { "calls", KSTAT_DATA_UINT64 }, 182 { "badcalls", KSTAT_DATA_UINT64 }, 183 { "badxids", KSTAT_DATA_UINT64 }, 184 { "timeouts", KSTAT_DATA_UINT64 }, 185 { "newcreds", KSTAT_DATA_UINT64 }, 186 { "badverfs", KSTAT_DATA_UINT64 }, 187 { "timers", KSTAT_DATA_UINT64 }, 188 { "cantconn", KSTAT_DATA_UINT64 }, 189 { "nomem", KSTAT_DATA_UINT64 }, 190 { "interrupts", KSTAT_DATA_UINT64 }, 191 { "longrpc", KSTAT_DATA_UINT64 } 192 }; 193 194 kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat; 195 uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t); 196 197 #ifdef DEBUG 198 int rdma_clnt_debug = 0; 199 #endif 200 201 #ifdef accurate_stats 202 extern kmutex_t rdmarcstat_lock; /* mutex for rcstat updates */ 203 204 #define RCSTAT_INCR(x) \ 205 mutex_enter(&rdmarcstat_lock); \ 206 rdmarcstat.x.value.ui64++; \ 207 mutex_exit(&rdmarcstat_lock); 208 #else 209 #define RCSTAT_INCR(x) \ 210 rdmarcstat.x.value.ui64++; 211 #endif 212 213 #define ptoh(p) (&((p)->cku_client)) 214 #define htop(h) ((cku_private_t *)((h)->cl_private)) 215 216 int 217 clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family, 218 rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl) 219 { 220 CLIENT *h; 221 struct cku_private *p; 222 struct rpc_msg call_msg; 223 rdma_registry_t *rp; 224 225 ASSERT(INGLOBALZONE(curproc)); 226 227 if (cl == NULL) 228 return (EINVAL); 229 *cl = NULL; 230 231 p = kmem_zalloc(sizeof (*p), KM_SLEEP); 232 233 /* 234 * Find underlying RDMATF plugin 235 */ 236 rw_enter(&rdma_lock, RW_READER); 237 rp = rdma_mod_head; 238 while (rp != NULL) { 239 if (strcmp(rp->r_mod->rdma_api, proto)) 240 rp = rp->r_next; 241 else { 242 p->cku_rd_mod = rp->r_mod; 243 p->cku_rd_handle = handle; 244 break; 245 } 246 } 247 rw_exit(&rdma_lock); 248 249 if (p->cku_rd_mod == NULL) { 250 /* 251 * Should not happen. 252 * No matching RDMATF plugin. 253 */ 254 kmem_free(p, sizeof (struct cku_private)); 255 return (EINVAL); 256 } 257 258 h = ptoh(p); 259 h->cl_ops = &rdma_clnt_ops; 260 h->cl_private = (caddr_t)p; 261 h->cl_auth = authkern_create(); 262 263 /* call message, just used to pre-serialize below */ 264 call_msg.rm_xid = 0; 265 call_msg.rm_direction = CALL; 266 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 267 call_msg.rm_call.cb_prog = pgm; 268 call_msg.rm_call.cb_vers = vers; 269 270 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE); 271 /* pre-serialize call message header */ 272 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 273 XDR_DESTROY(&p->cku_outxdr); 274 auth_destroy(h->cl_auth); 275 kmem_free(p, sizeof (struct cku_private)); 276 return (EINVAL); 277 } 278 279 /* 280 * Set up the rpc information 281 */ 282 p->cku_cred = cred; 283 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 284 p->cku_addr.maxlen = raddr->maxlen; 285 p->cku_addr.len = raddr->len; 286 bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 287 p->cku_addrfmly = family; 288 289 *cl = h; 290 return (0); 291 } 292 293 static void 294 clnt_rdma_kdestroy(CLIENT *h) 295 { 296 struct cku_private *p = htop(h); 297 298 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 299 kmem_free(p, sizeof (*p)); 300 } 301 302 void 303 clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr, 304 struct cred *cred) 305 { 306 struct cku_private *p = htop(h); 307 rdma_registry_t *rp; 308 309 ASSERT(INGLOBALZONE(curproc)); 310 /* 311 * Find underlying RDMATF plugin 312 */ 313 p->cku_rd_mod = NULL; 314 rw_enter(&rdma_lock, RW_READER); 315 rp = rdma_mod_head; 316 while (rp != NULL) { 317 if (strcmp(rp->r_mod->rdma_api, proto)) 318 rp = rp->r_next; 319 else { 320 p->cku_rd_mod = rp->r_mod; 321 p->cku_rd_handle = handle; 322 break; 323 } 324 325 } 326 rw_exit(&rdma_lock); 327 328 /* 329 * Set up the rpc information 330 */ 331 p->cku_cred = cred; 332 p->cku_xid = 0; 333 334 if (p->cku_addr.maxlen < raddr->len) { 335 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 336 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 337 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 338 p->cku_addr.maxlen = raddr->maxlen; 339 } 340 341 p->cku_addr.len = raddr->len; 342 bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 343 h->cl_ops = &rdma_clnt_ops; 344 } 345 346 static int clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum, 347 rdma_buf_t *rpcmsg, XDR *xdrs, 348 xdrproc_t xdr_args, caddr_t argsp) 349 { 350 cku_private_t *p = htop(h); 351 352 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 353 /* 354 * Copy in the preserialized RPC header 355 * information. 356 */ 357 bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE); 358 359 /* 360 * transaction id is the 1st thing in the output 361 * buffer. 362 */ 363 /* LINTED pointer alignment */ 364 (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid; 365 366 /* Skip the preserialized stuff. */ 367 XDR_SETPOS(xdrs, CKU_HDRSIZE); 368 369 /* Serialize dynamic stuff into the output buffer. */ 370 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 371 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 372 (!(*xdr_args)(xdrs, argsp))) { 373 cmn_err(CE_WARN,"Failed to serialize dynamic arguments\n"); 374 return CLNT_RDMA_FAIL; 375 } 376 p->cku_outsz = XDR_GETPOS(xdrs); 377 } else { 378 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE]; 379 IXDR_PUT_U_INT32(uproc, procnum); 380 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 381 XDR_SETPOS(xdrs, 0); 382 383 /* Serialize the procedure number and the arguments. */ 384 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr, 385 CKU_HDRSIZE+4, xdrs, NULL, NULL) || 386 !(*xdr_args)(xdrs, argsp)) { 387 if (rpcmsg->addr != xdrs->x_base) { 388 rpcmsg->addr = xdrs->x_base; 389 rpcmsg->len = xdr_getbufsize(xdrs); 390 } 391 cmn_err(CE_WARN,"Failed to serialize procedure number and the arguments.\n"); 392 return CLNT_RDMA_FAIL; 393 } 394 /* 395 * If we had to allocate a new buffer while encoding 396 * then update the addr and len. 397 */ 398 if (rpcmsg->addr != xdrs->x_base) { 399 rpcmsg->addr = xdrs->x_base; 400 rpcmsg->len = xdr_getbufsize(xdrs); 401 } 402 403 p->cku_outsz = XDR_GETPOS(xdrs); 404 } 405 406 return CLNT_RDMA_SUCCESS; 407 } 408 409 static int clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg, 410 XDR **xdrs, uint_t *op) 411 { 412 cku_private_t *p = htop(h); 413 uint_t vers; 414 uint32_t rdma_credit = rdma_bufs_rqst; 415 416 vers = RPCRDMA_VERS; 417 clmsg->type = SEND_BUFFER; 418 419 #ifdef DYNAMIC_CREDIT_CONTROL 420 clnt_compute_credit(conn, &rdma_credit); 421 #endif 422 423 if (RDMA_BUF_ALLOC(conn, clmsg)) { 424 return CLNT_RDMA_FAIL; 425 } 426 427 *xdrs = &p->cku_outxdr; 428 xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE); 429 430 (*(uint32_t *)clmsg->addr) = p->cku_xid; 431 XDR_SETPOS(*xdrs, sizeof (uint32_t)); 432 (void) xdr_u_int(*xdrs, &vers); 433 (void) xdr_u_int(*xdrs, &rdma_credit); 434 (void) xdr_u_int(*xdrs, op); 435 436 return CLNT_RDMA_SUCCESS; 437 } 438 439 static int clnt_setup_rlist(CONN *conn, XDR *xdrs, struct clist **cl) 440 { 441 int ret; 442 443 if (*cl != NULL) { 444 ret = clist_register(conn, *cl, 1); 445 if (ret != RDMA_SUCCESS) { 446 return CLNT_RDMA_FAIL; 447 } 448 } 449 (void) xdr_do_clist(xdrs, cl); 450 451 return CLNT_RDMA_SUCCESS; 452 } 453 454 static int clnt_setup_wlist(CONN *conn, rpcproc_t procnum, 455 struct clist **rpccall_wlist, caddr_t resultsp, 456 xdrproc_t xdr_results, XDR *xdrs) 457 { 458 int status; 459 uint_t num_segment = 0; 460 461 if (procnum == NFSPROC3_READ) { 462 clnt_read3args_make_wlist(resultsp, rpccall_wlist, 463 xdr_results, &num_segment); 464 status = clist_register(conn, *rpccall_wlist, 0); 465 if (status != RDMA_SUCCESS) 466 return CLNT_RDMA_FAIL; 467 } else { 468 *rpccall_wlist = NULL; 469 } 470 471 if (! xdr_encode_wlist(xdrs, *rpccall_wlist, num_segment)) 472 return CLNT_RDMA_FAIL; 473 474 return CLNT_RDMA_SUCCESS; 475 } 476 477 static int clnt_setup_long_reply(CONN *conn, rpcproc_t procnum, 478 struct clist *lrc_clist, 479 XDR *xdrs, bool_t *exists) 480 { 481 int status; 482 caddr_t addr; 483 #ifdef SERVER_REG_CACHE 484 rib_lrc_entry_t *long_reply_buf = NULL; 485 #endif 486 *exists = FALSE; 487 lrc_clist->c_daddr = NULL; 488 489 #ifdef RPC_RDMA_INLINE 490 if (lrc_clist->c_len < rdma_minchunk) 491 return CLNT_RDMA_SUCCESS; 492 #endif 493 494 if (procnum == NFSPROC3_READDIR || 495 procnum == NFSPROC3_READDIRPLUS || 496 procnum == NFSPROC3_READLINK) { 497 #ifndef SERVER_REG_CACHE 498 addr = kmem_alloc(LONG_REPLY_LEN, KM_SLEEP); 499 bzero(addr, LONG_REPLY_LEN); 500 lrc_clist->c_daddr = (uint64)addr; 501 lrc_clist->c_len = LONG_REPLY_LEN; 502 lrc_clist->c_next = NULL; 503 lrc_clist->long_reply_buf = NULL; 504 status = clist_register(conn, lrc_clist, 0); 505 #else 506 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, LONG_REPLY_LEN); 507 bzero(long_reply_buf->lrc_buf, LONG_REPLY_LEN); 508 lrc_clist->c_daddr = (uint64)long_reply_buf->lrc_buf; 509 lrc_clist->c_len = LONG_REPLY_LEN; 510 lrc_clist->c_next = NULL; 511 lrc_clist->long_reply_buf = (uint64)long_reply_buf; 512 lrc_clist->c_dmemhandle = long_reply_buf->lrc_mhandle; 513 status = clist_register(conn, lrc_clist, 0); 514 #endif 515 if(status) { 516 cmn_err(CE_WARN, "clnt_setup_long_reply: cannot register buffer"); 517 #ifndef SERVER_REG_CACHE 518 kmem_free((void*)addr, (size_t)LONG_REPLY_LEN); 519 #else 520 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf); 521 522 #endif 523 lrc_clist->c_daddr = NULL; 524 return CLNT_RDMA_FAIL; 525 } 526 *exists = TRUE; 527 } 528 529 return CLNT_RDMA_SUCCESS; 530 } 531 532 static void 533 clnt_read3args_make_wlist(caddr_t replyp, struct clist **rpccall_wlist, 534 xdrproc_t xr, uint_t *num_segment) 535 { 536 READ3uiores *ures = (READ3uiores *)replyp; 537 READ3vres *vres = (READ3vres *)replyp; 538 struct clist *rwl = NULL, *prev = NULL; 539 int i, total_length; 540 541 *rpccall_wlist = NULL; 542 543 #ifdef RPC_RDMA_INLINE 544 if (xr == x_READ3uiores) { 545 total_length = 0; 546 for(i=0; i<ures->uiop->uio_iovcnt; i++) { 547 total_length += ures->uiop->uio_iov[i].iov_len; 548 } 549 } else { 550 total_length = vres->data.data_len; 551 } 552 553 if (total_length < rdma_minchunk) 554 return; 555 #endif 556 557 /* XXX: fake a chunk threshold for the combined length for now */ 558 if (xr == x_READ3uiores) { 559 *num_segment = ures->uiop->uio_iovcnt; 560 for(i=0; i<ures->uiop->uio_iovcnt; i++) { 561 rwl = (struct clist *)kmem_zalloc(sizeof(struct clist), 562 KM_SLEEP); 563 564 rwl->c_len = ures->uiop->uio_iov[i].iov_len; 565 rwl->c_daddr = (uint64)(ures->uiop->uio_iov[i].iov_base); 566 /* 567 * if userspace address, put adspace ptr in clist. 568 * If not, then do nothing since it's already 569 * set to NULL (from empty_cl) 570 */ 571 if (ures->uiop->uio_segflg == UIO_USERSPACE) { 572 int error; 573 rwl->c_adspc = ttoproc(curthread)->p_as; 574 } else { 575 rwl->c_dpplist = (page_t **)NULL; 576 } 577 578 if(prev == NULL) 579 prev = rwl; 580 else { 581 prev->c_next = rwl; 582 prev = rwl; 583 } 584 585 if(*rpccall_wlist == NULL) 586 *rpccall_wlist = rwl; 587 } 588 rwl->c_next = NULL; 589 } else if (xr == x_READ3vres) { 590 *num_segment = 1; 591 rwl = (struct clist *)kmem_zalloc(sizeof (struct clist), 592 KM_SLEEP); 593 *rwl = empty_cl; 594 595 rwl->c_len = vres->data.data_len; 596 rwl->c_daddr = (uint64)(vres->data.data_val); 597 598 if(*rpccall_wlist == NULL) 599 *rpccall_wlist = rwl; 600 } else { 601 /*cmn_err(CE_NOTE, "read3args_make_wlist: non READ3xr=%p", 602 (void *)xr);*/ 603 } 604 } 605 606 /* ARGSUSED */ 607 static enum clnt_stat 608 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 609 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait) 610 { 611 cku_private_t *p = htop(h); 612 int status; 613 XDR *xdrs; 614 XDR *cxdrp = NULL, callxdr; /* for xdrrdma encoding the RPC call */ 615 XDR *rxdrp = NULL, replxdr; /* for xdrrdma decoding the RPC reply */ 616 struct rpc_msg reply_msg; 617 struct clist *sendlist = NULL, *recvlist = NULL; 618 struct clist *cl = NULL, *cle = NULL, *rdma_reply = NULL; 619 uint_t vers, op; 620 uint_t off; 621 uint32_t xid; 622 uint32_t seg_array_len; 623 CONN *conn = NULL; 624 rdma_buf_t clmsg = {0}, rpcmsg = {0}; 625 int msglen; 626 clock_t ticks; 627 bool_t wlist_exists_reply = FALSE; 628 bool_t long_reply_buf_exists = FALSE; 629 630 struct clist *rpccall_wlist = NULL, *rpcreply_wlist = NULL, 631 long_reply_clist ={0}; 632 rpccall_read_t read_type; 633 rpccall_write_t write_type; 634 uint32_t rdma_credit = rdma_bufs_rqst; 635 struct clist long_reply_buf_clist = {0}; 636 637 RCSTAT_INCR(rccalls); 638 /* 639 * Get unique xid 640 */ 641 if (p->cku_xid == 0) 642 p->cku_xid = alloc_xid(); 643 644 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr, 645 p->cku_addrfmly, p->cku_rd_handle, &conn); 646 647 if (conn == NULL) { 648 /* 649 * Connect failed to server. Could be because of one 650 * of several things. In some cases we don't want 651 * the caller to retry immediately - delay before 652 * returning to caller. 653 */ 654 switch (status) { 655 case RDMA_TIMEDOUT: 656 /* 657 * Already timed out. No need to delay 658 * some more. 659 */ 660 p->cku_err.re_status = RPC_TIMEDOUT; 661 p->cku_err.re_errno = ETIMEDOUT; 662 break; 663 case RDMA_INTR: 664 /* 665 * Failed because of an signal. Very likely 666 * the caller will not retry. 667 */ 668 p->cku_err.re_status = RPC_INTR; 669 p->cku_err.re_errno = EINTR; 670 break; 671 default: 672 /* 673 * All other failures - server down or service 674 * down or temporary resource failure. Delay before 675 * returning to caller. 676 */ 677 ticks = clnt_rdma_min_delay * drv_usectohz(1000000); 678 p->cku_err.re_status = RPC_CANTCONNECT; 679 p->cku_err.re_errno = EIO; 680 681 if (h->cl_nosignal == TRUE) { 682 delay(ticks); 683 } else { 684 if (delay_sig(ticks) == EINTR) { 685 p->cku_err.re_status = RPC_INTR; 686 p->cku_err.re_errno = EINTR; 687 } 688 } 689 break; 690 } 691 692 return (p->cku_err.re_status); 693 } 694 695 clnt_check_credit(conn); 696 697 /* 698 * Get the size of the rpc call message. Need this 699 * to determine if the rpc call message will fit in 700 * the pre-allocated RDMA buffers. If the rpc call 701 * message length is greater that the pre-allocated 702 * buffers then, it is a Long RPC. A one time use 703 * buffer is allocated and registered for the Long 704 * RPC call. 705 */ 706 xdrs = &callxdr; 707 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT; 708 709 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 710 msglen += xdrrdma_authsize(h->cl_auth, p->cku_cred, 711 rdma_minchunk); 712 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk); 713 714 if (msglen > RPC_MSG_SZ) 715 read_type = RPCCALL_RCHUNK; 716 else 717 read_type = RPCCALL_NORCHUNK; 718 } else { 719 /* 720 * For RPCSEC_GSS since we cannot accurately presize the 721 * buffer required for encoding, we assume that its going 722 * to be a Long RPC to start with. We also create the 723 * the XDR stream with min_chunk set to 0 which instructs 724 * the XDR layer to not chunk the incoming byte stream. 725 */ 726 727 msglen += 2 * MAX_AUTH_BYTES + 2 * sizeof (struct opaque_auth); 728 msglen += xdrrdma_sizeof(xdr_args, argsp, rdma_minchunk); 729 730 if (msglen > RPC_MSG_SZ) 731 read_type = RPCCALL_RCHUNK; 732 else 733 read_type = RPCCALL_NORCHUNK; 734 } 735 736 if (read_type == RPCCALL_NORCHUNK) { 737 738 rpcmsg.type = SEND_BUFFER; 739 if (RDMA_BUF_ALLOC(conn, &rpcmsg)) { 740 cmn_err(CE_WARN, "clnt_rdma_kcallit: no buffers!"); 741 goto done; 742 } 743 } else { 744 #ifdef SERVER_REG_CACHE 745 rib_lrc_entry_t *long_reply_buf = NULL; 746 #endif 747 rpcmsg.type = CHUNK_BUFFER; 748 #ifdef SERVER_REG_CACHE 749 long_reply_buf = RDMA_GET_SERVER_CACHE_BUF(conn, msglen); 750 rpcmsg.addr = long_reply_buf->lrc_buf; 751 #else 752 rpcmsg.addr = kmem_zalloc(msglen, KM_SLEEP); 753 #endif 754 cle = (struct clist *)kmem_zalloc(sizeof (struct clist), 755 KM_SLEEP); 756 cle->c_xdroff = 0; 757 cle->c_len = rpcmsg.len = msglen; 758 cle->c_saddr = (uint64)(uintptr_t)rpcmsg.addr; 759 cle->c_next = NULL; 760 #ifdef SERVER_REG_CACHE 761 cle->long_reply_buf = (uint64)long_reply_buf; 762 #endif 763 } 764 765 op = cle ? RDMA_NOMSG : RDMA_MSG; 766 cxdrp = xdrs; 767 xdrrdma_create(xdrs, rpcmsg.addr, (cle ? msglen : rpcmsg.len), 768 rdma_minchunk, cle, XDR_ENCODE, NULL); 769 770 status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, xdrs, xdr_args, argsp); 771 if (status != CLNT_RDMA_SUCCESS) { 772 rdma_buf_free(conn, &rpcmsg); 773 clist_free(cle); 774 p->cku_err.re_status = RPC_CANTENCODEARGS; 775 p->cku_err.re_errno = EIO; 776 cmn_err(CE_WARN, 777 "clnt_rdma_kcallit: clnt_compose_rpcmsg failed"); 778 goto done; 779 } 780 781 /* Read chunklist (a linked list of N elements, 782 * position P (same P for all chunks of same arg!): 783 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 784 */ 785 786 cl = xdrrdma_clist(xdrs); 787 788 /* 789 * Update the chunk size information for the Long RPC msg. 790 */ 791 if (cl && op == RDMA_NOMSG) 792 cl->c_len = p->cku_outsz; 793 794 /* 795 * Prepare the header for the RDMA chunk 796 */ 797 status = clnt_compose_rdma_header(conn, h, &clmsg, &xdrs, &op); 798 if (status != CLNT_RDMA_SUCCESS) { 799 p->cku_err.re_status = RPC_CANTSEND; 800 p->cku_err.re_errno = EIO; 801 rdma_buf_free(conn, &rpcmsg); 802 clist_free(cle); 803 RCSTAT_INCR(rcnomem); 804 cmn_err(CE_WARN, "clnt_rdma_kcallit: no free buffers!!"); 805 goto done; 806 } 807 808 status = clnt_setup_rlist(conn, xdrs, &cl); 809 if (status != CLNT_RDMA_SUCCESS) { 810 cmn_err(CE_WARN, "clnt_rdma_kcallit: clist register failed"); 811 rdma_buf_free(conn, &clmsg); 812 rdma_buf_free(conn, &rpcmsg); 813 clist_free(cl); 814 p->cku_err.re_status = RPC_CANTSEND; 815 p->cku_err.re_errno = EIO; 816 goto done; 817 } 818 819 /* Setup write chunk list for NFS3 READ operation 820 * Other operations will have a NULL wlist 821 */ 822 status = clnt_setup_wlist(conn, procnum, &rpccall_wlist, 823 resultsp, xdr_results, xdrs); 824 if (status != CLNT_RDMA_SUCCESS) { 825 rdma_buf_free(conn, &clmsg); 826 rdma_buf_free(conn, &rpcmsg); 827 clist_free(cl); 828 p->cku_err.re_status = RPC_CANTSEND; 829 p->cku_err.re_errno = EIO; 830 goto done; 831 } 832 833 status = clnt_setup_long_reply(conn, procnum, &long_reply_buf_clist, 834 xdrs, &long_reply_buf_exists); 835 if (status != CLNT_RDMA_SUCCESS) { 836 rdma_buf_free(conn, &clmsg); 837 rdma_buf_free(conn, &rpcmsg); 838 clist_free(cl); 839 p->cku_err.re_status = RPC_CANTSEND; 840 p->cku_err.re_errno = EIO; 841 goto done; 842 } 843 844 /* 845 * XDR encode the RDMA_REPLY write chunk 846 */ 847 seg_array_len = (long_reply_buf_exists ? 1:0); 848 (void) xdr_encode_reply_wchunk(xdrs, &long_reply_buf_clist, seg_array_len); 849 /* 850 * Start with the RDMA header and clist (if any) 851 */ 852 sendlist = NULL; 853 clist_add(&sendlist, 0, XDR_GETPOS(xdrs), &clmsg.handle, 854 clmsg.addr, NULL, NULL); 855 /* 856 * Put the RPC call message in the send list if small RPC 857 */ 858 if (op == RDMA_MSG) { 859 clist_add(&sendlist, 0, p->cku_outsz, &rpcmsg.handle, 860 rpcmsg.addr, NULL, NULL); 861 } else { 862 /* Long RPC already in chunk list */ 863 RCSTAT_INCR(rclongrpcs); 864 } 865 866 /* 867 * Set up a reply buffer ready for the reply 868 */ 869 status = rdma_clnt_postrecv(conn, p->cku_xid); 870 if (status != RDMA_SUCCESS) { 871 rdma_buf_free(conn, &clmsg); 872 rdma_buf_free(conn, &rpcmsg); 873 if (cl) { 874 (void) clist_deregister(conn, cl, 1); 875 clist_free(cl); 876 } 877 clist_free(sendlist); 878 p->cku_err.re_status = RPC_CANTSEND; 879 p->cku_err.re_errno = EIO; 880 goto done; 881 } 882 /* 883 * sync the memory for dma 884 */ 885 if (cl != NULL) { 886 status = clist_syncmem(conn, cl, 1); 887 if (status != RDMA_SUCCESS) { 888 rdma_buf_free(conn, &clmsg); 889 rdma_buf_free(conn, &rpcmsg); 890 (void) clist_deregister(conn, cl, 1); 891 clist_free(cl); 892 clist_free(sendlist); 893 p->cku_err.re_status = RPC_CANTSEND; 894 p->cku_err.re_errno = EIO; 895 goto done; 896 } 897 } 898 899 /* 900 * Send the call message to the server 901 */ 902 #if defined (CLNT_INTERRUPT_COAL) 903 status = RDMA_SEND_BL(conn, sendlist, p->cku_xid); 904 #else 905 status = RDMA_SEND(conn, sendlist, p->cku_xid); 906 #endif 907 if (status != RDMA_SUCCESS) { 908 if (cl) { 909 (void) clist_deregister(conn, cl, 1); 910 clist_free(cl); 911 /* 912 * If this was a long RPC message, need 913 * to free that buffer. 914 */ 915 if (rpcmsg.type == CHUNK_BUFFER) 916 rdma_buf_free(conn, &rpcmsg); 917 } 918 clist_free(sendlist); 919 p->cku_err.re_status = RPC_CANTSEND; 920 p->cku_err.re_errno = EIO; 921 goto done; 922 } else { 923 /* 924 * RDMA plugin now owns the send msg buffers. 925 * Clear them out and don't free them here. 926 */ 927 clmsg.addr = NULL; 928 if (rpcmsg.type == SEND_BUFFER) 929 rpcmsg.addr = NULL; 930 } 931 clist_free(sendlist); 932 933 /* 934 * Recv rpc reply 935 */ 936 status = RDMA_RECV(conn, &recvlist, p->cku_xid); 937 clnt_return_credit(conn); 938 939 /* 940 * Deregister chunks sent. Do this only after the reply 941 * is received as that is a sure indication that the 942 * remote end has completed RDMA of the chunks. 943 */ 944 if (cl != NULL) { 945 /* 946 * Deregister the chunks 947 */ 948 (void) clist_deregister(conn, cl, 1); 949 clist_free(cl); 950 /* 951 * If long RPC free chunk 952 */ 953 rdma_buf_free(conn, &rpcmsg); 954 } 955 956 /* 957 * Now check recv status 958 */ 959 if (status != 0) { 960 if (status == RDMA_INTR) { 961 p->cku_err.re_status = RPC_INTR; 962 p->cku_err.re_errno = EINTR; 963 RCSTAT_INCR(rcintrs); 964 } else if (status == RPC_TIMEDOUT) { 965 p->cku_err.re_status = RPC_TIMEDOUT; 966 p->cku_err.re_errno = ETIMEDOUT; 967 RCSTAT_INCR(rctimeouts); 968 } else { 969 p->cku_err.re_status = RPC_CANTRECV; 970 p->cku_err.re_errno = EIO; 971 } 972 goto done; 973 } 974 /* 975 * Process the reply message. 976 * 977 * First the chunk list (if any) 978 */ 979 xdrs = &(p->cku_inxdr); 980 xdrmem_create(xdrs, (caddr_t)(uintptr_t)recvlist->c_saddr, 981 recvlist->c_len, XDR_DECODE); 982 /* 983 * Treat xid as opaque (xid is the first entity 984 * in the rpc rdma message). 985 */ 986 xid = *(uint32_t *)(uintptr_t)recvlist->c_saddr; 987 /* Skip xid and set the xdr position accordingly. */ 988 XDR_SETPOS(xdrs, sizeof (uint32_t)); 989 (void) xdr_u_int(xdrs, &vers); 990 (void) xdr_u_int(xdrs, &rdma_credit); 991 (void) xdr_u_int(xdrs, &op); 992 (void) xdr_do_clist(xdrs, &cl); 993 clnt_update_credit(conn, rdma_credit); 994 wlist_exists_reply = FALSE; 995 if (! xdr_decode_wlist(xdrs, &rpcreply_wlist, &wlist_exists_reply)) { 996 cmn_err(CE_NOTE, 997 "clnt_rdma_kcallit: xdr_decode_wlist failed"); 998 /* XXX: what should we fail with here -- EIO? */ 999 } 1000 #ifdef RPC_RDMA_INLINE 1001 if (xdr_results == x_READ3vres) { 1002 ((READ3vres *)resultsp)->wlist = NULL; 1003 } else if (xdr_results == x_READ3uiores) { 1004 ((READ3uiores *)resultsp)->wlist = NULL; 1005 } 1006 #endif 1007 1008 if (procnum == NFSPROC3_READ) { 1009 1010 check_dereg_wlist(conn, rpccall_wlist); 1011 1012 if (wlist_exists_reply) { 1013 if (xdr_results == x_READ3vres) { 1014 ((READ3vres *)resultsp)->wlist = 1015 rpcreply_wlist; 1016 ((READ3vres *)resultsp)->wlist_len = 1017 rpcreply_wlist->c_len; 1018 } else if (xdr_results == x_READ3uiores) { 1019 ((READ3uiores *)resultsp)->wlist = 1020 rpcreply_wlist; 1021 ((READ3uiores *)resultsp)->wlist_len = 1022 rpcreply_wlist->c_len; 1023 } else { 1024 cmn_err(CE_NOTE, 1025 "unknown READ3 xdr decode fnp=%p", 1026 (void *)xdr_results); 1027 } 1028 } 1029 } else { 1030 if(wlist_exists_reply) 1031 cmn_err(CE_NOTE, 1032 "clnt_rdma_kcallit: received wlist for " 1033 "non-READ3 call. reply xdr decode fnp=%p", 1034 (void *)xdr_results); 1035 } 1036 1037 /* 1038 * The server shouldn't have sent a RDMA_SEND that 1039 * the client needs to RDMA_WRITE a reply back to 1040 * the server. So silently ignoring what the 1041 * server returns in the rdma_reply section of the 1042 * header. 1043 */ 1044 (void) xdr_decode_reply_wchunk(xdrs, &rdma_reply,conn); 1045 off = xdr_getpos(xdrs); 1046 1047 xdrs = &replxdr; 1048 if (clnt_decode_long_reply(conn, procnum, &long_reply_buf_clist, 1049 rdma_reply, xdrs, &rxdrp, 1050 cl, recvlist, op, off) != CLNT_RDMA_SUCCESS) 1051 { 1052 goto done; 1053 } 1054 reply_msg.rm_direction = REPLY; 1055 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 1056 reply_msg.acpted_rply.ar_stat = SUCCESS; 1057 reply_msg.acpted_rply.ar_verf = _null_auth; 1058 /* 1059 * xdr_results will be done in AUTH_UNWRAP. 1060 */ 1061 reply_msg.acpted_rply.ar_results.where = NULL; 1062 reply_msg.acpted_rply.ar_results.proc = xdr_void; 1063 1064 /* 1065 * Decode and validate the response. 1066 */ 1067 if (xdr_replymsg(xdrs, &reply_msg)) { 1068 enum clnt_stat re_status; 1069 1070 _seterr_reply(&reply_msg, &(p->cku_err)); 1071 1072 re_status = p->cku_err.re_status; 1073 if (re_status == RPC_SUCCESS) { 1074 /* 1075 * Reply is good, check auth. 1076 */ 1077 if (!AUTH_VALIDATE(h->cl_auth, 1078 &reply_msg.acpted_rply.ar_verf)) { 1079 p->cku_err.re_status = RPC_AUTHERROR; 1080 p->cku_err.re_why = AUTH_INVALIDRESP; 1081 RCSTAT_INCR(rcbadverfs); 1082 cmn_err(CE_WARN, 1083 "clnt_rdma_kcallit: AUTH_VALIDATE failed"); 1084 } else if (!AUTH_UNWRAP(h->cl_auth, xdrs, 1085 xdr_results, resultsp)) { 1086 p->cku_err.re_status = RPC_CANTDECODERES; 1087 p->cku_err.re_errno = EIO; 1088 cmn_err(CE_WARN, 1089 "clnt_rdma_kcallit: AUTH_UNWRAP failed"); 1090 } 1091 } else { 1092 /* set errno in case we can't recover */ 1093 if (re_status != RPC_VERSMISMATCH && 1094 re_status != RPC_AUTHERROR && 1095 re_status != RPC_PROGVERSMISMATCH) 1096 p->cku_err.re_errno = EIO; 1097 1098 if (re_status == RPC_AUTHERROR) { 1099 /* 1100 * Map recoverable and unrecoverable 1101 * authentication errors to appropriate 1102 * errno 1103 */ 1104 switch (p->cku_err.re_why) { 1105 case AUTH_BADCRED: 1106 case AUTH_BADVERF: 1107 case AUTH_INVALIDRESP: 1108 case AUTH_TOOWEAK: 1109 case AUTH_FAILED: 1110 case RPCSEC_GSS_NOCRED: 1111 case RPCSEC_GSS_FAILED: 1112 p->cku_err.re_errno = EACCES; 1113 break; 1114 case AUTH_REJECTEDCRED: 1115 case AUTH_REJECTEDVERF: 1116 default: 1117 p->cku_err.re_errno = EIO; 1118 break; 1119 } 1120 RPCLOG(1, "clnt_rdma_kcallit : " 1121 "authentication failed with " 1122 "RPC_AUTHERROR of type %d\n", 1123 p->cku_err.re_why); 1124 } 1125 cmn_err(CE_WARN, 1126 "clnt_rdma_kcallit: RPC failed"); 1127 1128 } 1129 } else { 1130 p->cku_err.re_status = RPC_CANTDECODERES; 1131 p->cku_err.re_errno = EIO; 1132 cmn_err(CE_WARN, "clnt_rdma_kcallit: xdr_replymsg failed"); 1133 } 1134 1135 /* 1136 * If rpc reply is in a chunk, free it now. 1137 */ 1138 done: 1139 if (long_reply_buf_exists){ 1140 (void) clist_deregister(conn, &long_reply_buf_clist, 0); 1141 #ifndef SERVER_REG_CACHE 1142 kmem_free((void *)long_reply_buf_clist.c_daddr, 1143 (size_t)long_reply_buf_clist.c_len); 1144 #else 1145 RDMA_FREE_SERVER_CACHE_BUF(conn, (rib_lrc_entry_t *)long_reply_buf_clist.long_reply_buf); 1146 #endif 1147 } 1148 if (cxdrp) 1149 XDR_DESTROY(cxdrp); 1150 if (rxdrp) { 1151 (void) xdr_rpc_free_verifier(rxdrp, &reply_msg); 1152 XDR_DESTROY(rxdrp); 1153 } 1154 1155 if (recvlist) { 1156 rdma_buf_t recvmsg = {0}; 1157 recvmsg.addr = (caddr_t)(uintptr_t)recvlist->c_saddr; 1158 recvmsg.type = RECV_BUFFER; 1159 RDMA_BUF_FREE(conn, &recvmsg); 1160 clist_free(recvlist); 1161 } 1162 #if (!defined(ASYNC_CLIENT_DEREG)) 1163 if(rpccall_wlist){ 1164 kmem_free(rpccall_wlist, sizeof(clist)); 1165 } 1166 #endif 1167 1168 RDMA_REL_CONN(conn); 1169 if (p->cku_err.re_status != RPC_SUCCESS) { 1170 RCSTAT_INCR(rcbadcalls); 1171 } 1172 return (p->cku_err.re_status); 1173 } 1174 1175 static int clnt_decode_long_reply(CONN *conn, rpcproc_t procnum, 1176 struct clist *long_reply_buf_clist, 1177 struct clist *rdma_reply, XDR *xdrs, 1178 XDR **rxdrp, struct clist *cl, 1179 struct clist *recvlist, 1180 uint_t op,uint_t off) 1181 { 1182 if ( RDMA_NOMSG == op && long_reply_buf_clist->c_daddr) { 1183 if (procnum == NFSPROC3_READDIR || 1184 procnum == NFSPROC3_READDIRPLUS || 1185 procnum == NFSPROC3_READLINK) { 1186 xdrmem_destroy(xdrs); 1187 xdrrdma_create(xdrs, 1188 (caddr_t)long_reply_buf_clist->c_daddr, 1189 rdma_reply->c_len, 1190 0, 1191 NULL, 1192 XDR_DECODE, 1193 conn); 1194 1195 *rxdrp = xdrs; 1196 } else { 1197 cmn_err(CE_NOTE, "clnt_rdma_kcallit: " 1198 "wchunk buffer for wrong nfs proc"); 1199 xdrmem_destroy(xdrs); 1200 *rxdrp = NULL; 1201 } 1202 } else if (cl && RDMA_NOMSG == op) { 1203 cmn_err(CE_NOTE, "clnt_rdma_kcallit: " 1204 "Server sent a READ list in the RPC Reply"); 1205 xdrmem_destroy(xdrs); 1206 } else { 1207 xdrmem_destroy(xdrs); 1208 xdrrdma_create(xdrs, 1209 (caddr_t)(uintptr_t)(recvlist->c_saddr + off), 1210 recvlist->c_len - off, 0, cl, XDR_DECODE, conn); 1211 *rxdrp = xdrs; 1212 } 1213 return CLNT_RDMA_SUCCESS; 1214 } 1215 1216 #ifdef DYNAMIC_CREDIT_CONTROL 1217 static void clnt_compute_credit(CONN *conn, uint32_t *rdma_credit) 1218 { 1219 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1220 1221 mutex_enter(&conn->c_lock); 1222 if(cc_info->clnt_cc_granted_ops - cc_info->clnt_cc_in_flight_ops < CLNT_CREDIT_LOW) 1223 *rdma_credit = rdma_bufs_rqst + cc_info->clnt_cc_in_flight_ops / 2; 1224 mutex_exit(&conn->c_lock); 1225 } 1226 #endif 1227 1228 static void clnt_return_credit(CONN *conn) 1229 { 1230 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1231 1232 mutex_enter(&conn->c_lock); 1233 cc_info->clnt_cc_in_flight_ops--; 1234 cv_signal(&cc_info->clnt_cc_cv); 1235 mutex_exit(&conn->c_lock); 1236 } 1237 1238 static void clnt_update_credit(CONN *conn, uint32_t rdma_credit) 1239 { 1240 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1241 1242 /* 1243 * Get the granted number of buffers for credit control. 1244 */ 1245 mutex_enter(&conn->c_lock); 1246 cc_info->clnt_cc_granted_ops = rdma_credit; 1247 mutex_exit(&conn->c_lock); 1248 } 1249 1250 static void clnt_check_credit(CONN *conn) 1251 { 1252 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1253 1254 /* 1255 * Make sure we are not going over our allowed buffer use 1256 * (and make sure we have gotten a granted value before). 1257 */ 1258 mutex_enter(&conn->c_lock); 1259 while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops 1260 && cc_info->clnt_cc_granted_ops != 0) { 1261 /* 1262 * Client has maxed out its granted buffers due to 1263 * credit control. Current handling is to block and wait. 1264 */ 1265 cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock); 1266 } 1267 cc_info->clnt_cc_in_flight_ops++; 1268 mutex_exit(&conn->c_lock); 1269 } 1270 1271 /* ARGSUSED */ 1272 static void 1273 clnt_rdma_kabort(CLIENT *h) 1274 { 1275 } 1276 1277 static void 1278 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err) 1279 { 1280 struct cku_private *p = htop(h); 1281 1282 *err = p->cku_err; 1283 } 1284 1285 static bool_t 1286 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 1287 { 1288 struct cku_private *p = htop(h); 1289 XDR *xdrs; 1290 1291 xdrs = &(p->cku_outxdr); 1292 xdrs->x_op = XDR_FREE; 1293 return ((*xdr_res)(xdrs, res_ptr)); 1294 } 1295 1296 /* ARGSUSED */ 1297 static bool_t 1298 clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg) 1299 { 1300 return (TRUE); 1301 } 1302 1303 /* ARGSUSED */ 1304 static int 1305 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1306 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg, 1307 uint32_t xid) 1308 { 1309 RCSTAT_INCR(rctimers); 1310 return (0); 1311 } 1312 1313 int 1314 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf) 1315 { 1316 rdma_registry_t *rp; 1317 void *handle = NULL; 1318 struct knetconfig *knc; 1319 char *pf, *p; 1320 rdma_stat status; 1321 int error = 0; 1322 1323 mutex_enter(&rdma_modload_lock); 1324 error = rdma_modload(); 1325 mutex_exit(&rdma_modload_lock); 1326 1327 if (!INGLOBALZONE(curproc)) 1328 return (-1); 1329 /* 1330 * modload the RDMA plugins if not already done. 1331 */ 1332 if (!rdma_modloaded) { 1333 mutex_enter(&rdma_modload_lock); 1334 if (!rdma_modloaded) { 1335 error = rdma_modload(); 1336 } 1337 mutex_exit(&rdma_modload_lock); 1338 if (error) 1339 return (-1); 1340 } 1341 1342 if (!rdma_dev_available) 1343 return (-1); 1344 1345 rw_enter(&rdma_lock, RW_READER); 1346 rp = rdma_mod_head; 1347 while (rp != NULL) { 1348 status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr, 1349 &handle); 1350 if (status == RDMA_SUCCESS) { 1351 knc = kmem_zalloc(sizeof (struct knetconfig), 1352 KM_SLEEP); 1353 knc->knc_semantics = NC_TPI_RDMA; 1354 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1355 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1356 if (addr_type == AF_INET) 1357 (void) strncpy(pf, NC_INET, KNC_STRSIZE); 1358 else if (addr_type == AF_INET6) 1359 (void) strncpy(pf, NC_INET6, KNC_STRSIZE); 1360 pf[KNC_STRSIZE - 1] = '\0'; 1361 1362 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE); 1363 p[KNC_STRSIZE - 1] = '\0'; 1364 1365 knc->knc_protofmly = pf; 1366 knc->knc_proto = p; 1367 knc->knc_rdev = (dev_t)handle; 1368 *knconf = knc; 1369 rw_exit(&rdma_lock); 1370 return (0); 1371 } 1372 rp = rp->r_next; 1373 } 1374 rw_exit(&rdma_lock); 1375 return (-1); 1376 } 1377 1378 static void 1379 check_dereg_wlist(CONN *conn, clist *rwc) 1380 { 1381 if (rwc == NULL) 1382 return; 1383 1384 if (rwc) { 1385 if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) { 1386 int status; 1387 #if defined(ASYNC_CLIENT_DEREG) 1388 /* Add in an entry to rqueue */ 1389 INSERT_QUEUE(conn, rwc); 1390 #else 1391 status = clist_deregister(conn, rwc, FALSE); 1392 if (status != RDMA_SUCCESS) { 1393 cmn_err(CE_NOTE, "dereg_wlist failed." 1394 "status=%d", status); 1395 } 1396 #endif 1397 } 1398 1399 } 1400 }--- EOF ---