1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 /* Copyright (c) 2006, The Ohio State University. All rights reserved. 29 * 30 * Portions of this source code is developed by the team members of 31 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 32 * headed by Professor Dhabaleswar K. (DK) Panda. 33 * 34 * Acknowledgements to contributions from developors: 35 * Ranjit Noronha: noronha@cse.ohio-state.edu 36 * Lei Chai : chail@cse.ohio-state.edu 37 * Weikuan Yu : yuw@cse.ohio-state.edu 38 * 39 */ 40 41 #pragma ident "@(#)rpcib.c 1.29 06/01/25 SMI" 42 43 /* 44 * The rpcib plugin. Implements the interface for RDMATF's 45 * interaction with IBTF. 46 */ 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/user.h> 51 #include <sys/systm.h> 52 #include <sys/sysmacros.h> 53 #include <sys/proc.h> 54 #include <sys/socket.h> 55 #include <sys/file.h> 56 #include <sys/stream.h> 57 #include <sys/strsubr.h> 58 #include <sys/stropts.h> 59 #include <sys/errno.h> 60 #include <sys/kmem.h> 61 #include <sys/debug.h> 62 #include <sys/systm.h> 63 #include <sys/pathname.h> 64 #include <sys/kstat.h> 65 #include <sys/t_lock.h> 66 #include <sys/ddi.h> 67 #include <sys/cmn_err.h> 68 #include <sys/time.h> 69 #include <sys/isa_defs.h> 70 #include <sys/callb.h> 71 #include <sys/sunddi.h> 72 #include <sys/sunndi.h> 73 74 /* #define IB_FMR_SUP */ 75 /* #define CLNT_POLL_CQ */ 76 #include <sys/ib/ibtl/ibti.h> 77 #include <rpc/rpc.h> 78 #include <rpc/ib.h> 79 80 #include <sys/modctl.h> 81 82 #include <sys/pathname.h> 83 #include <sys/kstr.h> 84 #include <sys/sockio.h> 85 #include <sys/vnode.h> 86 #include <sys/tiuser.h> 87 #include <net/if.h> 88 #include <sys/cred.h> 89 #include <rpc/rpc_rdma.h> 90 91 int num_clients = 0; 92 volatile uint32_t is_server = 0; 93 94 extern char *inet_ntop(int, const void *, char *, int); 95 96 97 /* 98 * Prototype declarations for driver ops 99 */ 100 101 static int rpcib_attach(dev_info_t *, ddi_attach_cmd_t); 102 static int rpcib_getinfo(dev_info_t *, ddi_info_cmd_t, 103 void *, void **); 104 static int rpcib_detach(dev_info_t *, ddi_detach_cmd_t); 105 106 107 /* rpcib cb_ops */ 108 static struct cb_ops rpcib_cbops = { 109 nulldev, /* open */ 110 nulldev, /* close */ 111 nodev, /* strategy */ 112 nodev, /* print */ 113 nodev, /* dump */ 114 nodev, /* read */ 115 nodev, /* write */ 116 nodev, /* ioctl */ 117 nodev, /* devmap */ 118 nodev, /* mmap */ 119 nodev, /* segmap */ 120 nochpoll, /* poll */ 121 ddi_prop_op, /* prop_op */ 122 NULL, /* stream */ 123 D_MP, /* cb_flag */ 124 CB_REV, /* rev */ 125 nodev, /* int (*cb_aread)() */ 126 nodev /* int (*cb_awrite)() */ 127 }; 128 129 130 131 132 /* 133 * Device options 134 */ 135 static struct dev_ops rpcib_ops = { 136 DEVO_REV, /* devo_rev, */ 137 0, /* refcnt */ 138 rpcib_getinfo, /* info */ 139 nulldev, /* identify */ 140 nulldev, /* probe */ 141 rpcib_attach, /* attach */ 142 rpcib_detach, /* detach */ 143 nodev, /* reset */ 144 &rpcib_cbops, /* driver ops - devctl interfaces */ 145 NULL, /* bus operations */ 146 NULL /* power */ 147 }; 148 149 /* 150 * Module linkage information. 151 */ 152 153 static struct modldrv rib_modldrv = { 154 &mod_driverops, /* Driver module */ 155 "RPCIB plugin driver, ver 1.29", /* Driver name and version */ 156 &rpcib_ops, /* Driver ops */ 157 }; 158 159 static struct modlinkage rib_modlinkage = { 160 MODREV_1, 161 (void *)&rib_modldrv, 162 NULL 163 }; 164 165 #ifdef SERVER_REG_CACHE 166 typedef struct cache_struct { 167 avl_node_t avl_link; 168 rib_lrc_entry_t r; 169 uint32_t len; 170 uint32_t elements; 171 kmutex_t node_lock; 172 } cache_avl_struct_t; 173 174 175 #if 1 176 int rib_total_buffers = 0; 177 #endif 178 #endif 179 /* 180 * rib_stat: private data pointer used when registering 181 * with the IBTF. It is returned to the consumer 182 * in all callbacks. 183 */ 184 static rpcib_state_t *rib_stat = NULL; 185 186 #define RNR_RETRIES IBT_RNR_INFINITE_RETRY 187 #define MAX_PORTS 2 188 189 #ifdef IB_FMR_SUP 190 #define IB_FMR_DIRTY_MARK 32 191 #define IB_FMR_MAX_SIZE 1048576 192 /*#define IB_FMR_MAX_SIZE 32768 */ 193 #endif 194 195 int preposted_rbufs = RDMA_BUFS_GRANT; 196 int send_threshold = 1; 197 198 /* 199 * State of the plugin. 200 * ACCEPT = accepting new connections and requests. 201 * NO_ACCEPT = not accepting new connection and requests. 202 * This should eventually move to rpcib_state_t structure, since this 203 * will tell in which state the plugin is for a particular type of service 204 * like NFS, NLM or v4 Callback deamon. The plugin might be in accept 205 * state for one and in no_accept state for the other. 206 */ 207 int plugin_state; 208 kmutex_t plugin_state_lock; 209 210 211 /* 212 * RPCIB RDMATF operations 213 */ 214 #if defined(MEASURE_POOL_DEPTH) 215 static void rib_posted_rbufs(uint32_t x) { return;} 216 #endif 217 static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle); 218 static rdma_stat rib_disconnect(CONN *conn); 219 static void rib_listen(struct rdma_svc_data *rd); 220 static void rib_listen_stop(struct rdma_svc_data *rd); 221 static rdma_stat rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 222 struct mrc *buf_handle); 223 static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf, 224 struct mrc buf_handle); 225 static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, 226 caddr_t buf, uint_t buflen, struct mrc *buf_handle); 227 static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, 228 struct mrc buf_handle); 229 #ifdef SERVER_REG_CACHE 230 static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 231 struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc); 232 static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf, 233 struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *); 234 #else 235 static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 236 struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle); 237 static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf, 238 struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle); 239 240 #endif 241 static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, 242 caddr_t buf, int len, int cpu); 243 244 static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf); 245 246 static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf); 247 static void *rib_rbuf_alloc(CONN *, rdma_buf_t *); 248 249 static void rib_rbuf_free(CONN *conn, int ptype, void *buf); 250 251 static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid); 252 #if defined (CLNT_INTERRUPT_COAL) 253 static void rib_scq_free(caddr_t); 254 static rdma_stat rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid); 255 #endif 256 #if defined(ASYNC_SERVER_DEREG) 257 static rdma_stat rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t, caddr_t, int, caddr_t, int, int, int); 258 #endif 259 #if defined(ASYNC_CLIENT_DEREG) 260 static void insert_queue(CONN *conn, struct clist *rwc); 261 #endif 262 static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid); 263 static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid); 264 static rdma_stat rib_post_recv(CONN *conn, struct clist *cl); 265 static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid); 266 static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait); 267 static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait); 268 static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **); 269 static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **); 270 static rdma_stat rib_conn_release(CONN *conn); 271 static rdma_stat rib_getinfo(rdma_info_t *info); 272 #ifdef DYNAMIC_CREDIT_CONTROL 273 void rib_get_resource_info(CONN *, int *, int *); 274 #endif 275 276 #ifdef SERVER_REG_CACHE 277 static rib_lrc_entry_t *rib_get_server_cache_buf(CONN *conn, uint32_t len); 278 static void rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *buf); 279 static void rib_destroy_cache(rib_hca_t *hca); 280 static void 281 rib_server_side_cache_reclaim(void *argp); 282 static int avl_compare(const void *t1,const void *t2); 283 #endif 284 285 static rdma_stat rib_register_ats(rib_hca_t *); 286 static void rib_deregister_ats(); 287 static void rib_stop_services(rib_hca_t *); 288 289 /* 290 * RPCIB addressing operations 291 */ 292 char ** get_ip_addrs(int *count); 293 int get_interfaces(TIUSER *tiptr, int *num); 294 int find_addrs(TIUSER *tiptr, char **addrs, int num_ifs); 295 int get_ibd_ipaddr(rpcib_ibd_insts_t *); 296 rpcib_ats_t *get_ibd_entry(ib_gid_t *, ib_pkey_t, rpcib_ibd_insts_t *); 297 void rib_get_ibd_insts(rpcib_ibd_insts_t *); 298 #if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG) 299 static int clist_deregister1(CONN *, struct clist *, bool_t ); 300 #endif 301 302 #if defined(ASYNC_CLIENT_DEREG) 303 typedef struct async_dereg { 304 struct async_dereg *forw; 305 struct async_dereg *back; 306 CONN c_conn; 307 struct clist c_clist; 308 } ASYNC; 309 static void async_dereg_thread(caddr_t arg); 310 extern pri_t minclsyspri; /* priority for taskq */ 311 static ASYNC rqueue; 312 static kmutex_t at_mutex; 313 static kcondvar_t at_cond; 314 #endif 315 /* 316 * RDMA operations the RPCIB module exports 317 */ 318 static rdmaops_t rib_ops = { 319 rib_reachable, 320 rib_conn_get, 321 rib_conn_release, 322 rib_listen, 323 rib_listen_stop, 324 rib_registermem, 325 rib_deregistermem, 326 rib_registermemsync, 327 rib_deregistermemsync, 328 rib_syncmem, 329 rib_reg_buf_alloc, 330 rib_reg_buf_free, 331 rib_send, 332 #if defined (CLNT_INTERRUPT_COAL) 333 rib_send_bl, 334 #endif 335 #if defined(ASYNC_SERVER_DEREG) 336 rib_send_nw, 337 #endif 338 rib_send_resp, 339 rib_post_resp, 340 rib_post_recv, 341 rib_recv, 342 rib_read, 343 rib_write, 344 rib_getinfo, 345 #ifdef SERVER_REG_CACHE 346 rib_get_server_cache_buf, 347 rib_free_server_cache_buf, 348 #endif 349 #ifdef DYNAMIC_CREDIT_CONTROL 350 rib_get_resource_info, 351 #endif 352 #if defined(ASYNC_CLIENT_DEREG) 353 insert_queue, 354 #endif 355 }; 356 357 /* 358 * RDMATF RPCIB plugin details 359 */ 360 static rdma_mod_t rib_mod = { 361 "ibtf", /* api name */ 362 RDMATF_VERS_1, 363 0, 364 &rib_ops, /* rdma op vector for ibtf */ 365 }; 366 367 static rdma_stat open_hcas(rpcib_state_t *); 368 static rdma_stat rib_qp_init(rib_qp_t *, int); 369 static void rib_svc_scq_handler(ibt_cq_hdl_t, void *); 370 static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *); 371 static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *); 372 static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *); 373 static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num); 374 #ifdef IB_FMR_SUP 375 static rdma_stat rib_reg_mem_fmr(rib_hca_t *, caddr_t adsp,caddr_t, uint_t, ibt_mr_flags_t, 376 ibt_mr_hdl_t *, ibt_ma_hdl_t *, ibt_pmr_desc_t *); 377 #endif 378 static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t, ibt_mr_flags_t, 379 ibt_mr_hdl_t *, ibt_mr_desc_t *); 380 static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t, 381 ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t); 382 static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *); 383 static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *, 384 rib_qp_t **); 385 static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t, 386 rib_qp_t **); 387 static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *); 388 static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *); 389 static int rib_free_sendwait(struct send_wid *); 390 static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid); 391 static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd); 392 static void rdma_done_rem_list(rib_qp_t *); 393 static void rdma_done_notify(rib_qp_t *qp, uint32_t xid); 394 395 static void rib_async_handler(void *, 396 ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *); 397 static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *); 398 static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *); 399 static int rib_free_svc_recv(struct svc_recv *); 400 static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t); 401 static void rib_free_wid(struct recv_wid *); 402 static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *); 403 static void rib_detach_hca(rib_hca_t *); 404 static rdma_stat rib_chk_srv_ats(rib_hca_t *, struct netbuf *, int, 405 ibt_path_info_t *); 406 407 /* 408 * Registration with IBTF as a consumer 409 */ 410 static struct ibt_clnt_modinfo_s rib_modinfo = { 411 IBTI_V2, 412 IBT_GENERIC, 413 rib_async_handler, /* async event handler */ 414 NULL, /* Memory Region Handler */ 415 "nfs/ib" 416 }; 417 418 /* 419 * Global strucuture 420 */ 421 422 typedef struct rpcib_s { 423 dev_info_t *rpcib_dip; 424 kmutex_t rpcib_mutex; 425 } rpcib_t; 426 427 rpcib_t rpcib; 428 429 /* 430 * /etc/system controlled variable to control 431 * debugging in rpcib kernel module. 432 * Set it to values greater that 1 to control 433 * the amount of debugging messages required. 434 */ 435 int rib_debug = 0; 436 #if defined(CLNT_POLL_CQ) 437 int max_poll_count = 500; 438 #endif 439 static int ats_running = 0; 440 441 442 int 443 _init(void) 444 { 445 int error; 446 447 error = mod_install((struct modlinkage *)&rib_modlinkage); 448 if (error != 0) { 449 /* 450 * Could not load module 451 */ 452 return (error); 453 } 454 mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL); 455 456 return (0); 457 } 458 459 int 460 _fini() 461 { 462 int status; 463 464 if ((status = rdma_unregister_mod(&rib_mod)) != RDMA_SUCCESS) { 465 return (EBUSY); 466 } 467 468 rib_deregister_ats(); 469 470 /* 471 * Remove module 472 */ 473 if ((status = mod_remove(&rib_modlinkage)) != 0) { 474 (void) rdma_register_mod(&rib_mod); 475 return (status); 476 } 477 mutex_destroy(&plugin_state_lock); 478 return (0); 479 } 480 481 int 482 _info(struct modinfo *modinfop) 483 { 484 return (mod_info(&rib_modlinkage, modinfop)); 485 } 486 487 488 /* 489 * rpcib_getinfo() 490 * Given the device number, return the devinfo pointer or the 491 * instance number. 492 * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach. 493 */ 494 495 /*ARGSUSED*/ 496 static int 497 rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 498 { 499 int ret = DDI_SUCCESS; 500 501 switch (cmd) { 502 case DDI_INFO_DEVT2DEVINFO: 503 if (rpcib.rpcib_dip != NULL) 504 *result = rpcib.rpcib_dip; 505 else { 506 *result = NULL; 507 ret = DDI_FAILURE; 508 } 509 break; 510 511 case DDI_INFO_DEVT2INSTANCE: 512 *result = NULL; 513 break; 514 515 default: 516 ret = DDI_FAILURE; 517 } 518 return (ret); 519 } 520 521 static int 522 rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 523 { 524 ibt_status_t ibt_status; 525 rdma_stat r_status; 526 527 switch (cmd) { 528 case DDI_ATTACH: 529 break; 530 case DDI_RESUME: 531 return (DDI_SUCCESS); 532 default: 533 return (DDI_FAILURE); 534 } 535 536 mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL); 537 538 mutex_enter(&rpcib.rpcib_mutex); 539 if (rpcib.rpcib_dip != NULL) { 540 mutex_exit(&rpcib.rpcib_mutex); 541 return (DDI_FAILURE); 542 } 543 rpcib.rpcib_dip = dip; 544 mutex_exit(&rpcib.rpcib_mutex); 545 /* 546 * Create the "rpcib" minor-node. 547 */ 548 if (ddi_create_minor_node(dip, 549 "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) { 550 /* Error message, no cmn_err as they print on console */ 551 return (DDI_FAILURE); 552 } 553 554 if (rib_stat == NULL) { 555 rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP); 556 mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL); 557 } 558 559 rib_stat->hca_count = ibt_get_hca_list(&rib_stat->hca_guids); 560 if (rib_stat->hca_count < 1) { 561 mutex_destroy(&rib_stat->open_hca_lock); 562 kmem_free(rib_stat, sizeof (*rib_stat)); 563 rib_stat = NULL; 564 return (DDI_FAILURE); 565 } 566 567 ibt_status = ibt_attach(&rib_modinfo, dip, 568 (void *)rib_stat, &rib_stat->ibt_clnt_hdl); 569 if (ibt_status != IBT_SUCCESS) { 570 ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 571 mutex_destroy(&rib_stat->open_hca_lock); 572 kmem_free(rib_stat, sizeof (*rib_stat)); 573 rib_stat = NULL; 574 return (DDI_FAILURE); 575 } 576 577 mutex_enter(&rib_stat->open_hca_lock); 578 if (open_hcas(rib_stat) != RDMA_SUCCESS) { 579 ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 580 (void) ibt_detach(rib_stat->ibt_clnt_hdl); 581 mutex_exit(&rib_stat->open_hca_lock); 582 mutex_destroy(&rib_stat->open_hca_lock); 583 kmem_free(rib_stat, sizeof (*rib_stat)); 584 rib_stat = NULL; 585 return (DDI_FAILURE); 586 } 587 mutex_exit(&rib_stat->open_hca_lock); 588 589 /* 590 * Register with rdmatf 591 */ 592 rib_mod.rdma_count = rib_stat->hca_count; 593 r_status = rdma_register_mod(&rib_mod); 594 if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) { 595 rib_detach_hca(rib_stat->hca); 596 ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 597 (void) ibt_detach(rib_stat->ibt_clnt_hdl); 598 mutex_destroy(&rib_stat->open_hca_lock); 599 kmem_free(rib_stat, sizeof (*rib_stat)); 600 rib_stat = NULL; 601 return (DDI_FAILURE); 602 } 603 604 605 return (DDI_SUCCESS); 606 } 607 608 /*ARGSUSED*/ 609 static int 610 rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 611 { 612 switch (cmd) { 613 614 case DDI_DETACH: 615 break; 616 617 case DDI_SUSPEND: 618 default: 619 return (DDI_FAILURE); 620 } 621 622 /* 623 * Detach the hca and free resources 624 */ 625 mutex_enter(&plugin_state_lock); 626 plugin_state = NO_ACCEPT; 627 mutex_exit(&plugin_state_lock); 628 rib_detach_hca(rib_stat->hca); 629 ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 630 (void) ibt_detach(rib_stat->ibt_clnt_hdl); 631 632 mutex_enter(&rpcib.rpcib_mutex); 633 rpcib.rpcib_dip = NULL; 634 mutex_exit(&rpcib.rpcib_mutex); 635 636 mutex_destroy(&rpcib.rpcib_mutex); 637 return (DDI_SUCCESS); 638 } 639 640 641 static void 642 rib_deregister_ats() 643 { 644 rib_hca_t *hca; 645 rib_service_t *srv_list, *to_remove; 646 ibt_status_t ibt_status; 647 648 /* 649 * deregister the Address Translation Service. 650 */ 651 hca = rib_stat->hca; 652 rw_enter(&hca->service_list_lock, RW_WRITER); 653 srv_list = hca->ats_list; 654 while (srv_list != NULL) { 655 to_remove = srv_list; 656 srv_list = to_remove->srv_next; 657 658 ibt_status = ibt_deregister_ar(hca->ibt_clnt_hdl, 659 &to_remove->srv_ar); 660 if (ibt_status != IBT_SUCCESS) { 661 #ifdef DEBUG 662 if (rib_debug) { 663 cmn_err(CE_WARN, "_fini: " 664 "ibt_deregister_ar FAILED" 665 " status: %d", ibt_status); 666 } 667 #endif 668 } else { 669 mutex_enter(&rib_stat->open_hca_lock); 670 ats_running = 0; 671 mutex_exit(&rib_stat->open_hca_lock); 672 #ifdef DEBUG 673 if (rib_debug) { 674 675 cmn_err(CE_NOTE, "_fini: " 676 "Successfully unregistered" 677 " ATS service: %s", 678 to_remove->srv_name); 679 } 680 #endif 681 } 682 kmem_free(to_remove, sizeof (rib_service_t)); 683 } 684 hca->ats_list = NULL; 685 rw_exit(&hca->service_list_lock); 686 } 687 688 static void rib_rbufpool_free(rib_hca_t *, int); 689 static void rib_rbufpool_deregister(rib_hca_t *, int); 690 static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype); 691 static struct reply *rib_addreplylist(rib_qp_t *, uint32_t); 692 static rdma_stat rib_rem_replylist(rib_qp_t *); 693 static int rib_remreply(rib_qp_t *, struct reply *); 694 static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *); 695 static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *); 696 697 698 /* 699 * One CQ pair per HCA 700 */ 701 static rdma_stat 702 rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler, 703 rib_cq_t **cqp, rpcib_state_t *ribstat) 704 { 705 rib_cq_t *cq; 706 ibt_cq_attr_t cq_attr; 707 uint32_t real_size; 708 ibt_status_t status; 709 rdma_stat error = RDMA_SUCCESS; 710 711 cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP); 712 cq->rib_hca = hca; 713 cq_attr.cq_size = cq_size; 714 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 715 status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl, 716 &real_size); 717 if (status != IBT_SUCCESS) { 718 cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed," 719 " status=%d", status); 720 error = RDMA_FAILED; 721 goto fail; 722 } 723 ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, ribstat); 724 725 /* 726 * Enable CQ callbacks. CQ Callbacks are single shot 727 * (e.g. you have to call ibt_enable_cq_notify() 728 * after each callback to get another one). 729 */ 730 status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION); 731 if (status != IBT_SUCCESS) { 732 cmn_err(CE_WARN, "rib_create_cq: " 733 "enable_cq_notify failed, status %d", status); 734 error = RDMA_FAILED; 735 goto fail; 736 } 737 *cqp = cq; 738 739 return (error); 740 fail: 741 if (cq->rib_cq_hdl) 742 (void) ibt_free_cq(cq->rib_cq_hdl); 743 if (cq) 744 kmem_free(cq, sizeof (rib_cq_t)); 745 return (error); 746 } 747 748 static rdma_stat 749 open_hcas(rpcib_state_t *ribstat) 750 { 751 rib_hca_t *hca; 752 ibt_status_t ibt_status; 753 rdma_stat status; 754 ibt_hca_portinfo_t *pinfop; 755 ibt_pd_flags_t pd_flags = IBT_PD_NO_FLAGS; 756 uint_t size, cq_size; 757 int i; 758 #ifdef IB_FMR_SUP 759 ibt_fmr_pool_attr_t fmr_attr; 760 uint_t h_page_sz; 761 #endif 762 ASSERT(MUTEX_HELD(&ribstat->open_hca_lock)); 763 if (ribstat->hcas == NULL) 764 ribstat->hcas = kmem_zalloc(ribstat->hca_count * 765 sizeof (rib_hca_t), KM_SLEEP); 766 767 /* 768 * Open a hca and setup for RDMA 769 */ 770 for (i = 0; i < ribstat->hca_count; i++) { 771 ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl, 772 ribstat->hca_guids[i], 773 &ribstat->hcas[i].hca_hdl); 774 if (ibt_status != IBT_SUCCESS) { 775 cmn_err(CE_WARN, "open_hcas: ibt_open_hca (%d) " 776 "returned %d", i, ibt_status); 777 continue; 778 } 779 ribstat->hcas[i].hca_guid = ribstat->hca_guids[i]; 780 hca = &(ribstat->hcas[i]); 781 hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl; 782 hca->state = HCA_INITED; 783 784 /* 785 * query HCA info 786 */ 787 ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs); 788 if (ibt_status != IBT_SUCCESS) { 789 cmn_err(CE_WARN, "open_hcas: ibt_query_hca " 790 "returned %d (hca_guid 0x%llx)", 791 ibt_status, (longlong_t)ribstat->hca_guids[i]); 792 goto fail1; 793 } 794 795 /* 796 * One PD (Protection Domain) per HCA. 797 * A qp is allowed to access a memory region 798 * only when it's in the same PD as that of 799 * the memory region. 800 */ 801 ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl); 802 if (ibt_status != IBT_SUCCESS) { 803 cmn_err(CE_WARN, "open_hcas: ibt_alloc_pd " 804 "returned %d (hca_guid 0x%llx)", 805 ibt_status, (longlong_t)ribstat->hca_guids[i]); 806 goto fail1; 807 } 808 809 /* 810 * query HCA ports 811 */ 812 ibt_status = ibt_query_hca_ports(hca->hca_hdl, 813 0, &pinfop, &hca->hca_nports, &size); 814 if (ibt_status != IBT_SUCCESS) { 815 cmn_err(CE_WARN, "open_hcas: " 816 "ibt_query_hca_ports returned %d " 817 "(hca_guid 0x%llx)", 818 ibt_status, (longlong_t)hca->hca_guid); 819 goto fail2; 820 } 821 hca->hca_ports = pinfop; 822 hca->hca_pinfosz = size; 823 pinfop = NULL; 824 825 cq_size = DEF_CQ_SIZE; /* default cq size */ 826 /* 827 * Create 2 pairs of cq's (1 pair for client 828 * and the other pair for server) on this hca. 829 * If number of qp's gets too large, then several 830 * cq's will be needed. 831 */ 832 status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler, 833 &hca->svc_rcq, ribstat); 834 if (status != RDMA_SUCCESS) { 835 goto fail3; 836 } 837 838 status = rib_create_cq(hca, cq_size, rib_svc_scq_handler, 839 &hca->svc_scq, ribstat); 840 if (status != RDMA_SUCCESS) { 841 goto fail3; 842 } 843 844 status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler, 845 &hca->clnt_rcq, ribstat); 846 if (status != RDMA_SUCCESS) { 847 goto fail3; 848 } 849 850 status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler, 851 &hca->clnt_scq, ribstat); 852 if (status != RDMA_SUCCESS) { 853 goto fail3; 854 } 855 856 /* 857 * Create buffer pools. 858 * Note rib_rbuf_create also allocates memory windows. 859 */ 860 hca->recv_pool = rib_rbufpool_create(hca, 861 RECV_BUFFER, MAX_BUFS); 862 if (hca->recv_pool == NULL) { 863 cmn_err(CE_WARN, "open_hcas: recv buf pool failed\n"); 864 goto fail3; 865 } 866 867 hca->send_pool = rib_rbufpool_create(hca, 868 SEND_BUFFER, MAX_BUFS); 869 if (hca->send_pool == NULL) { 870 cmn_err(CE_WARN, "open_hcas: send buf pool failed\n"); 871 rib_rbufpool_destroy(hca, RECV_BUFFER); 872 goto fail3; 873 } 874 #ifdef IB_FMR_SUP 875 /* Global FMR POOL */ 876 bzero(&fmr_attr, sizeof (ibt_fmr_pool_attr_t)); 877 878 h_page_sz = hca->hca_attrs.hca_page_sz * 1024; 879 880 fmr_attr.fmr_max_pages_per_fmr = 881 (IB_FMR_MAX_SIZE / h_page_sz) + 2; 882 fmr_attr.fmr_pool_size = MAX_BUFS * 2; 883 fmr_attr.fmr_dirty_watermark = IB_FMR_DIRTY_MARK; 884 fmr_attr.fmr_page_sz = h_page_sz; 885 fmr_attr.fmr_cache = B_FALSE; 886 fmr_attr.fmr_flags = IBT_MR_SLEEP | 887 IBT_MR_ENABLE_LOCAL_WRITE | 888 IBT_MR_ENABLE_REMOTE_READ | 889 IBT_MR_ENABLE_REMOTE_WRITE; 890 fmr_attr.fmr_func_hdlr = NULL; 891 892 if (rib_debug > 1) { 893 cmn_err(CE_NOTE, "open_hcas: ibt_create_fmr_pool:"); 894 cmn_err(CE_NOTE, "fmr_page_sz %d, fmr_pool_sz %d, " 895 "max_pages_per_fmr %d", fmr_attr.fmr_page_sz, 896 fmr_attr.fmr_pool_size, 897 fmr_attr.fmr_max_pages_per_fmr); 898 } 899 900 ibt_status = ibt_create_fmr_pool(hca->hca_hdl, hca->pd_hdl, 901 &fmr_attr, &hca->fmr_pool); 902 if (ibt_status != IBT_SUCCESS) { 903 cmn_err(CE_WARN, "open_hcas: Global FMR pool creation " 904 "failed: %d\n", ibt_status); 905 rib_rbufpool_destroy(hca, RECV_BUFFER); 906 rib_rbufpool_destroy(hca, SEND_BUFFER); 907 goto fail3; 908 } 909 #endif 910 #ifdef SERVER_REG_CACHE 911 cmn_err(CE_NOTE,"Registration Cache enabled\n"); 912 { 913 cache_avl_struct_t my_avl_node; 914 hca->server_side_cache = 915 kmem_cache_create("rib_server_side_cache", 916 sizeof (cache_avl_struct_t), 0, 917 NULL, 918 NULL, 919 rib_server_side_cache_reclaim, 920 hca, NULL, 0); 921 avl_create(&hca->avl_tree, 922 avl_compare, 923 sizeof(cache_avl_struct_t), 924 (uint_t)&my_avl_node.avl_link-(uint_t)&my_avl_node); 925 /* mutex_init(&hca->avl_lock, NULL, MUTEX_DEFAULT, NULL);*/ 926 rw_init(&hca->avl_rw_lock, NULL, RW_DRIVER, hca->iblock); 927 hca->avl_init = TRUE; 928 929 } 930 #endif 931 932 #if defined(ASYNC_CLIENT_DEREG) 933 rqueue.forw = rqueue.back = &rqueue; 934 mutex_init(&at_mutex, NULL, MUTEX_DEFAULT, NULL); 935 cv_init(&at_cond, NULL, CV_DEFAULT, NULL); 936 (void) thread_create(NULL, 0, async_dereg_thread, NULL, 0, &p0, 937 TS_RUN, minclsyspri); 938 #endif 939 /* 940 * Initialize the registered service list and 941 * the lock 942 */ 943 hca->service_list = NULL; 944 rw_init(&hca->service_list_lock, NULL, RW_DRIVER, hca->iblock); 945 946 mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 947 cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL); 948 rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER, 949 hca->iblock); 950 rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER, 951 hca->iblock); 952 rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock); 953 mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock); 954 hca->inuse = TRUE; 955 /* 956 * XXX One hca only. Add multi-hca functionality if needed 957 * later. 958 */ 959 ribstat->hca = hca; 960 ribstat->nhca_inited++; 961 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 962 break; 963 964 fail3: 965 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 966 fail2: 967 (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 968 fail1: 969 (void) ibt_close_hca(hca->hca_hdl); 970 971 } 972 if (ribstat->hca != NULL) 973 return (RDMA_SUCCESS); 974 else 975 return (RDMA_FAILED); 976 } 977 978 /* 979 * Callback routines 980 */ 981 982 /* 983 * SCQ handlers 984 */ 985 /* ARGSUSED */ 986 static void 987 rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 988 { 989 ibt_status_t ibt_status; 990 ibt_wc_t wc; 991 int i; 992 993 /* 994 * Re-enable cq notify here to avoid missing any 995 * completion queue notification. 996 */ 997 (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 998 999 ibt_status = IBT_SUCCESS; 1000 while (ibt_status != IBT_CQ_EMPTY) { 1001 bzero(&wc, sizeof (wc)); 1002 ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 1003 if (ibt_status != IBT_SUCCESS) 1004 return; 1005 1006 /* 1007 * Got a send completion 1008 */ 1009 if (wc.wc_id != NULL) { /* XXX can it be otherwise ???? */ 1010 struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id; 1011 CONN *conn = qptoc(wd->qp); 1012 1013 mutex_enter(&wd->sendwait_lock); 1014 switch (wc.wc_status) { 1015 case IBT_WC_SUCCESS: 1016 wd->status = RDMA_SUCCESS; 1017 break; 1018 case IBT_WC_WR_FLUSHED_ERR: 1019 wd->status = RDMA_FAILED; 1020 break; 1021 default: 1022 /* 1023 * RC Send Q Error Code Local state Remote State 1024 * ==================== =========== ============ 1025 * IBT_WC_BAD_RESPONSE_ERR ERROR None 1026 * IBT_WC_LOCAL_LEN_ERR ERROR None 1027 * IBT_WC_LOCAL_CHAN_OP_ERR ERROR None 1028 * IBT_WC_LOCAL_PROTECT_ERR ERROR None 1029 * IBT_WC_MEM_WIN_BIND_ERR ERROR None 1030 * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR 1031 * IBT_WC_REMOTE_ACCESS_ERR ERROR ERROR 1032 * IBT_WC_REMOTE_OP_ERR ERROR ERROR 1033 * IBT_WC_RNR_NAK_TIMEOUT_ERR ERROR None 1034 * IBT_WC_TRANS_TIMEOUT_ERR ERROR None 1035 * IBT_WC_WR_FLUSHED_ERR None None 1036 */ 1037 #ifdef DEBUG 1038 if (rib_debug > 1) { 1039 if (wc.wc_status != IBT_WC_SUCCESS) { 1040 cmn_err(CE_NOTE, "rib_clnt_scq_handler: " 1041 "WR completed in error, wc.wc_status:%d, " 1042 "wc_id:%llx\n", wc.wc_status, (longlong_t)wc.wc_id); 1043 } 1044 } 1045 #endif 1046 /* 1047 * Channel in error state. Set connection to 1048 * ERROR and cleanup will happen either from 1049 * conn_release or from rib_conn_get 1050 */ 1051 wd->status = RDMA_FAILED; 1052 mutex_enter(&conn->c_lock); 1053 if (conn->c_state != C_DISCONN_PEND) 1054 conn->c_state = C_ERROR; 1055 mutex_exit(&conn->c_lock); 1056 break; 1057 } 1058 if (wd->cv_sig == 1) { 1059 /* 1060 * Notify poster 1061 */ 1062 cv_signal(&wd->wait_cv); 1063 mutex_exit(&wd->sendwait_lock); 1064 } else { 1065 /* 1066 * Poster not waiting for notification. 1067 * Free the send buffers and send_wid 1068 */ 1069 for (i = 0; i < wd->nsbufs; i++) { 1070 rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER, 1071 (void *)(uintptr_t)wd->sbufaddr[i]); 1072 } 1073 mutex_exit(&wd->sendwait_lock); 1074 (void) rib_free_sendwait(wd); 1075 } 1076 } 1077 } 1078 } 1079 1080 #if defined (CLNT_INTERRUPT_COAL) 1081 static void 1082 rib_scq_free(caddr_t widd) 1083 { 1084 struct send_wid *wd = (struct send_wid *)widd; 1085 ibt_status_t ibt_status; 1086 ibt_wc_t wc; 1087 int i; 1088 CONN *conn = qptoc(wd->qp); 1089 1090 wc.wc_status = RDMA_SUCCESS; 1091 mutex_enter(&wd->sendwait_lock); 1092 switch (wc.wc_status) { 1093 case IBT_WC_SUCCESS: 1094 wd->status = RDMA_SUCCESS; 1095 break; 1096 case IBT_WC_WR_FLUSHED_ERR: 1097 wd->status = RDMA_FAILED; 1098 break; 1099 default: 1100 /* 1101 * RC Send Q Error Code Local state Remote State 1102 * ==================== =========== ============ 1103 * IBT_WC_BAD_RESPONSE_ERR ERROR None 1104 * IBT_WC_LOCAL_LEN_ERR ERROR None 1105 * IBT_WC_LOCAL_CHAN_OP_ERR ERROR None 1106 * IBT_WC_LOCAL_PROTECT_ERR ERROR None 1107 * IBT_WC_MEM_WIN_BIND_ERR ERROR None 1108 * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR 1109 * IBT_WC_REMOTE_ACCESS_ERR ERROR ERROR 1110 * IBT_WC_REMOTE_OP_ERR ERROR ERROR 1111 * IBT_WC_RNR_NAK_TIMEOUT_ERR ERROR None 1112 * IBT_WC_TRANS_TIMEOUT_ERR ERROR None 1113 * IBT_WC_WR_FLUSHED_ERR None None 1114 */ 1115 #ifdef DEBUG 1116 if (rib_debug > 1) { 1117 if (wc.wc_status != IBT_WC_SUCCESS) { 1118 cmn_err(CE_NOTE, "rib_clnt_scq_handler: " 1119 "WR completed in error, wc.wc_status:%d, " 1120 "wc_id:%llx\n", wc.wc_status, (longlong_t)wc.wc_id); 1121 } 1122 } 1123 #endif 1124 /* 1125 * Channel in error state. Set connection to 1126 * ERROR and cleanup will happen either from 1127 * conn_release or from rib_conn_get 1128 */ 1129 wd->status = RDMA_FAILED; 1130 mutex_enter(&conn->c_lock); 1131 if (conn->c_state != C_DISCONN_PEND) 1132 conn->c_state = C_ERROR; 1133 mutex_exit(&conn->c_lock); 1134 break; 1135 } 1136 if (wd->cv_sig == 1) { 1137 /* 1138 * Notify poster 1139 */ 1140 cmn_err(CE_NOTE,"Some error \n"); 1141 cv_signal(&wd->wait_cv); 1142 mutex_exit(&wd->sendwait_lock); 1143 } else { 1144 /* 1145 * Poster not waiting for notification. 1146 * Free the send buffers and send_wid 1147 */ 1148 for (i = 0; i < wd->nsbufs; i++) { 1149 rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER, 1150 (void *)(uintptr_t)wd->sbufaddr[i]); 1151 } 1152 mutex_exit(&wd->sendwait_lock); 1153 (void) rib_free_sendwait(wd); 1154 } 1155 } 1156 #endif 1157 1158 /* ARGSUSED */ 1159 static void 1160 rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 1161 { 1162 ibt_status_t ibt_status; 1163 ibt_wc_t wc; 1164 int i; 1165 1166 /* 1167 * Re-enable cq notify here to avoid missing any 1168 * completion queue notification. 1169 */ 1170 (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 1171 1172 ibt_status = IBT_SUCCESS; 1173 while (ibt_status != IBT_CQ_EMPTY) { 1174 bzero(&wc, sizeof (wc)); 1175 ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 1176 if (ibt_status != IBT_SUCCESS) 1177 return; 1178 1179 /* 1180 * Got a send completion 1181 */ 1182 #ifdef DEBUG 1183 if (rib_debug > 1 && wc.wc_status != IBT_WC_SUCCESS) { 1184 cmn_err(CE_NOTE, "rib_svc_scq_handler: WR completed in error " 1185 "wc.wc_status:%d, wc_id:%llX", 1186 wc.wc_status, (longlong_t)wc.wc_id); 1187 } 1188 #endif 1189 if (wc.wc_id != NULL) { /* XXX NULL possible ???? */ 1190 struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id; 1191 #ifdef ASYNC_SERVER_DEREG 1192 if(wd->c1){ 1193 (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c1, TRUE); 1194 #ifdef SERVER_REG_CACHE 1195 RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c1)->long_reply_buf)); 1196 #else 1197 if(wd->c1 && wd->l1) 1198 kmem_free((void *) (wd->c1)->c_saddr, wd->l1); 1199 #endif 1200 kmem_free((void *)(wd->c1), wd->wl * sizeof(struct clist)); 1201 } 1202 if(wd->c2){ 1203 (void) clist_deregister1((CONN *)wd->c, (struct clist *)wd->c2, TRUE); 1204 #ifdef SERVER_REG_CACHE 1205 RDMA_FREE_SERVER_CACHE_BUF((CONN *)wd->c, (rib_lrc_entry_t *)(((struct clist *)wd->c2)->long_reply_buf)); 1206 #else 1207 if(wd->l2) 1208 kmem_free((void *) (wd->c2)->c_saddr, wd->l2); 1209 #endif 1210 kmem_free((void *)(wd->c2), wd->rl * sizeof(struct clist)); 1211 } 1212 #endif 1213 mutex_enter(&wd->sendwait_lock); 1214 if (wd->cv_sig == 1) { 1215 /* 1216 * Update completion status and notify poster 1217 */ 1218 if (wc.wc_status == IBT_WC_SUCCESS) 1219 wd->status = RDMA_SUCCESS; 1220 else 1221 wd->status = RDMA_FAILED; 1222 cv_signal(&wd->wait_cv); 1223 mutex_exit(&wd->sendwait_lock); 1224 } else { 1225 /* 1226 * Poster not waiting for notification. 1227 * Free the send buffers and send_wid 1228 */ 1229 for (i = 0; i < wd->nsbufs; i++) { 1230 rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER, 1231 (void *)(uintptr_t)wd->sbufaddr[i]); 1232 } 1233 mutex_exit(&wd->sendwait_lock); 1234 (void) rib_free_sendwait(wd); 1235 } 1236 } 1237 } 1238 } 1239 1240 /* 1241 * RCQ handler 1242 */ 1243 /* ARGSUSED */ 1244 static void 1245 rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 1246 { 1247 rib_qp_t *qp; 1248 ibt_status_t ibt_status; 1249 ibt_wc_t wc; 1250 struct recv_wid *rwid; 1251 #if defined(CLNT_POLL_CQ) 1252 uint32_t count = 0; 1253 #endif 1254 1255 /* 1256 * Re-enable cq notify here to avoid missing any 1257 * completion queue notification. 1258 */ 1259 #if !defined(CLNT_POLL_CQ) 1260 (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 1261 #endif 1262 1263 ibt_status = IBT_SUCCESS; 1264 while (ibt_status != IBT_CQ_EMPTY) { 1265 #if defined(CLNT_POLL_CQ) 1266 poll_cq_again: 1267 #endif 1268 bzero(&wc, sizeof (wc)); 1269 ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 1270 #if defined(CLNT_POLL_CQ) 1271 if (ibt_status == IBT_CQ_EMPTY){ 1272 count ++; 1273 if(count == max_poll_count){ 1274 (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 1275 return; 1276 } 1277 goto poll_cq_again; 1278 } 1279 #endif 1280 if (ibt_status != IBT_SUCCESS) 1281 #if defined(CLNT_POLL_CQ) 1282 { 1283 (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 1284 #endif 1285 return; 1286 #if defined(CLNT_POLL_CQ) 1287 } 1288 count = 0; 1289 #endif 1290 rwid = (struct recv_wid *)(uintptr_t)wc.wc_id; 1291 qp = rwid->qp; 1292 if (wc.wc_status == IBT_WC_SUCCESS) { 1293 XDR inxdrs, *xdrs; 1294 uint_t xid, vers, op, find_xid = 0; 1295 struct reply *r; 1296 CONN *conn = qptoc(qp); 1297 uint32_t rdma_credit = 0; 1298 1299 xdrs = &inxdrs; 1300 xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr, 1301 wc.wc_bytes_xfer, XDR_DECODE); 1302 /* 1303 * Treat xid as opaque (xid is the first entity 1304 * in the rpc rdma message). 1305 */ 1306 xid = *(uint32_t *)(uintptr_t)rwid->addr; 1307 /* Skip xid and set the xdr position accordingly. */ 1308 XDR_SETPOS(xdrs, sizeof (uint32_t)); 1309 (void) xdr_u_int(xdrs, &vers); 1310 (void) xdr_u_int(xdrs, &rdma_credit); 1311 (void) xdr_u_int(xdrs, &op); 1312 XDR_DESTROY(xdrs); 1313 if (vers != RPCRDMA_VERS) { 1314 /* 1315 * Invalid RPC/RDMA version. Cannot interoperate. 1316 * Set connection to ERROR state and bail out. 1317 */ 1318 mutex_enter(&conn->c_lock); 1319 if (conn->c_state != C_DISCONN_PEND) 1320 conn->c_state = C_ERROR; 1321 mutex_exit(&conn->c_lock); 1322 rib_rbuf_free(conn, RECV_BUFFER, 1323 (void *)(uintptr_t)rwid->addr); 1324 rib_free_wid(rwid); 1325 continue; 1326 } 1327 1328 mutex_enter(&qp->replylist_lock); 1329 for (r = qp->replylist; r != NULL; r = r->next) { 1330 if (r->xid == xid) { 1331 find_xid = 1; 1332 switch (op) { 1333 case RDMA_MSG: 1334 case RDMA_NOMSG: 1335 case RDMA_MSGP: 1336 r->status = RDMA_SUCCESS; 1337 r->vaddr_cq = rwid->addr; 1338 r->bytes_xfer = wc.wc_bytes_xfer; 1339 cv_signal(&r->wait_cv); 1340 break; 1341 default: 1342 rib_rbuf_free(qptoc(qp), RECV_BUFFER, 1343 (void *)(uintptr_t)rwid->addr); 1344 break; 1345 } 1346 break; 1347 } 1348 } 1349 mutex_exit(&qp->replylist_lock); 1350 if (find_xid == 0) { 1351 /* RPC caller not waiting for reply */ 1352 #ifdef DEBUG 1353 if (rib_debug) { 1354 cmn_err(CE_NOTE, "rib_clnt_rcq_handler: " 1355 "NO matching xid %u!\n", xid); 1356 } 1357 #endif 1358 rib_rbuf_free(qptoc(qp), RECV_BUFFER, 1359 (void *)(uintptr_t)rwid->addr); 1360 } 1361 } else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) { 1362 CONN *conn = qptoc(qp); 1363 1364 /* 1365 * Connection being flushed. Just free 1366 * the posted buffer 1367 */ 1368 rib_rbuf_free(conn, RECV_BUFFER, 1369 (void *)(uintptr_t)rwid->addr); 1370 } else { 1371 CONN *conn = qptoc(qp); 1372 /* 1373 * RC Recv Q Error Code Local state Remote State 1374 * ==================== =========== ============ 1375 * IBT_WC_LOCAL_ACCESS_ERR ERROR ERROR when NAK recvd 1376 * IBT_WC_LOCAL_LEN_ERR ERROR ERROR when NAK recvd 1377 * IBT_WC_LOCAL_PROTECT_ERR ERROR ERROR when NAK recvd 1378 * IBT_WC_LOCAL_CHAN_OP_ERR ERROR ERROR when NAK recvd 1379 * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR when NAK recvd 1380 * IBT_WC_WR_FLUSHED_ERR None None 1381 */ 1382 /* 1383 * Channel in error state. Set connection 1384 * in ERROR state. 1385 */ 1386 mutex_enter(&conn->c_lock); 1387 if (conn->c_state != C_DISCONN_PEND) 1388 conn->c_state = C_ERROR; 1389 mutex_exit(&conn->c_lock); 1390 rib_rbuf_free(conn, RECV_BUFFER, 1391 (void *)(uintptr_t)rwid->addr); 1392 } 1393 rib_free_wid(rwid); 1394 } 1395 } 1396 1397 /* Server side */ 1398 /* ARGSUSED */ 1399 static void 1400 rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 1401 { 1402 struct recv_data *rd; 1403 rib_qp_t *qp; 1404 ibt_status_t ibt_status; 1405 ibt_wc_t wc; 1406 struct svc_recv *s_recvp; 1407 CONN *conn; 1408 mblk_t *mp; 1409 1410 /* 1411 * Re-enable cq notify here to avoid missing any 1412 * completion queue notification. 1413 */ 1414 (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 1415 1416 ibt_status = IBT_SUCCESS; 1417 while (ibt_status != IBT_CQ_EMPTY) { 1418 bzero(&wc, sizeof (wc)); 1419 ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 1420 if (ibt_status != IBT_SUCCESS) 1421 return; 1422 1423 s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id; 1424 qp = s_recvp->qp; 1425 conn = qptoc(qp); 1426 mutex_enter(&qp->posted_rbufs_lock); 1427 qp->n_posted_rbufs--; 1428 #if defined(MEASURE_POOL_DEPTH) 1429 rib_posted_rbufs(preposted_rbufs - qp->n_posted_rbufs); 1430 #endif 1431 if (qp->n_posted_rbufs == 0) 1432 cv_signal(&qp->posted_rbufs_cv); 1433 mutex_exit(&qp->posted_rbufs_lock); 1434 1435 if (wc.wc_status == IBT_WC_SUCCESS) { 1436 XDR inxdrs, *xdrs; 1437 uint_t xid, vers, op; 1438 uint32_t rdma_credit; 1439 1440 xdrs = &inxdrs; 1441 /* s_recvp->vaddr stores data */ 1442 xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr, 1443 wc.wc_bytes_xfer, XDR_DECODE); 1444 1445 /* 1446 * Treat xid as opaque (xid is the first entity 1447 * in the rpc rdma message). 1448 */ 1449 xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr; 1450 /* Skip xid and set the xdr position accordingly. */ 1451 XDR_SETPOS(xdrs, sizeof (uint32_t)); 1452 if (!xdr_u_int(xdrs, &vers) || 1453 !xdr_u_int(xdrs, &rdma_credit) || 1454 !xdr_u_int(xdrs, &op)) { 1455 rib_rbuf_free(conn, RECV_BUFFER, 1456 (void *)(uintptr_t)s_recvp->vaddr); 1457 XDR_DESTROY(xdrs); 1458 #ifdef DEBUG 1459 cmn_err(CE_NOTE, "rib_svc_rcq_handler: " 1460 "xdr_u_int failed for qp %p, wc_id=%llx", 1461 (void *)qp, (longlong_t)wc.wc_id); 1462 #endif 1463 (void) rib_free_svc_recv(s_recvp); 1464 continue; 1465 } 1466 XDR_DESTROY(xdrs); 1467 1468 if (vers != RPCRDMA_VERS) { 1469 /* 1470 * Invalid RPC/RDMA version. Drop rpc rdma message. 1471 */ 1472 rib_rbuf_free(conn, RECV_BUFFER, 1473 (void *)(uintptr_t)s_recvp->vaddr); 1474 (void) rib_free_svc_recv(s_recvp); 1475 continue; 1476 } 1477 /* 1478 * Is this for RDMA_DONE? 1479 */ 1480 if (op == RDMA_DONE) { 1481 rib_rbuf_free(conn, RECV_BUFFER, 1482 (void *)(uintptr_t)s_recvp->vaddr); 1483 /* 1484 * Wake up the thread waiting on 1485 * a RDMA_DONE for xid 1486 */ 1487 mutex_enter(&qp->rdlist_lock); 1488 rdma_done_notify(qp, xid); 1489 mutex_exit(&qp->rdlist_lock); 1490 (void) rib_free_svc_recv(s_recvp); 1491 continue; 1492 } 1493 1494 mutex_enter(&plugin_state_lock); 1495 if (plugin_state == ACCEPT) { 1496 while ((mp = allocb(sizeof (*rd), BPRI_LO)) == NULL) 1497 (void) strwaitbuf(sizeof (*rd), BPRI_LO); 1498 /* 1499 * Plugin is in accept state, hence the master 1500 * transport queue for this is still accepting 1501 * requests. Hence we can call svc_queuereq to 1502 * queue this recieved msg. 1503 */ 1504 rd = (struct recv_data *)mp->b_rptr; 1505 rd->conn = conn; 1506 rd->rpcmsg.addr = (caddr_t)(uintptr_t)s_recvp->vaddr; 1507 rd->rpcmsg.type = RECV_BUFFER; 1508 rd->rpcmsg.len = wc.wc_bytes_xfer; 1509 rd->status = wc.wc_status; 1510 mutex_enter(&conn->c_lock); 1511 conn->c_ref++; 1512 mutex_exit(&conn->c_lock); 1513 mp->b_wptr += sizeof (*rd); 1514 svc_queuereq((queue_t *)rib_stat->q, mp); 1515 mutex_exit(&plugin_state_lock); 1516 } else { 1517 /* 1518 * The master transport for this is going 1519 * away and the queue is not accepting anymore 1520 * requests for krpc, so don't do anything, just 1521 * free the msg. 1522 */ 1523 mutex_exit(&plugin_state_lock); 1524 rib_rbuf_free(conn, RECV_BUFFER, 1525 (void *)(uintptr_t)s_recvp->vaddr); 1526 } 1527 } else { 1528 rib_rbuf_free(conn, RECV_BUFFER, 1529 (void *)(uintptr_t)s_recvp->vaddr); 1530 } 1531 (void) rib_free_svc_recv(s_recvp); 1532 } 1533 } 1534 1535 /* 1536 * Handles DR event of IBT_HCA_DETACH_EVENT. 1537 */ 1538 /* ARGSUSED */ 1539 static void 1540 rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 1541 ibt_async_code_t code, ibt_async_event_t *event) 1542 { 1543 1544 switch (code) { 1545 case IBT_HCA_ATTACH_EVENT: 1546 /* ignore */ 1547 break; 1548 case IBT_HCA_DETACH_EVENT: 1549 { 1550 ASSERT(rib_stat->hca->hca_hdl == hca_hdl); 1551 rib_detach_hca(rib_stat->hca); 1552 #ifdef DEBUG 1553 cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n"); 1554 #endif 1555 break; 1556 } 1557 #ifdef DEBUG 1558 case IBT_EVENT_PATH_MIGRATED: 1559 cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PATH_MIGRATED\n"); 1560 break; 1561 case IBT_EVENT_SQD: 1562 cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n"); 1563 break; 1564 case IBT_EVENT_COM_EST: 1565 cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n"); 1566 break; 1567 case IBT_ERROR_CATASTROPHIC_CHAN: 1568 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CATASTROPHIC_CHAN\n"); 1569 break; 1570 case IBT_ERROR_INVALID_REQUEST_CHAN: 1571 cmn_err(CE_NOTE, "rib_async_handler(): " 1572 "IBT_ERROR_INVALID_REQUEST_CHAN\n"); 1573 break; 1574 case IBT_ERROR_ACCESS_VIOLATION_CHAN: 1575 cmn_err(CE_NOTE, "rib_async_handler(): " 1576 "IBT_ERROR_ACCESS_VIOLATION_CHAN\n"); 1577 break; 1578 case IBT_ERROR_PATH_MIGRATE_REQ: 1579 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PATH_MIGRATE_REQ\n"); 1580 break; 1581 case IBT_ERROR_CQ: 1582 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n"); 1583 break; 1584 case IBT_ERROR_PORT_DOWN: 1585 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n"); 1586 break; 1587 case IBT_EVENT_PORT_UP: 1588 cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n"); 1589 break; 1590 case IBT_ASYNC_OPAQUE1: 1591 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n"); 1592 break; 1593 case IBT_ASYNC_OPAQUE2: 1594 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n"); 1595 break; 1596 case IBT_ASYNC_OPAQUE3: 1597 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n"); 1598 break; 1599 case IBT_ASYNC_OPAQUE4: 1600 cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n"); 1601 break; 1602 #endif 1603 default: 1604 break; 1605 } 1606 } 1607 1608 /* 1609 * Client's reachable function. 1610 */ 1611 static rdma_stat 1612 rib_reachable(int addr_type, struct netbuf *raddr, void **handle) 1613 { 1614 rib_hca_t *hca; 1615 rdma_stat status; 1616 1617 /* 1618 * First check if a hca is still attached 1619 */ 1620 *handle = NULL; 1621 rw_enter(&rib_stat->hca->state_lock, RW_READER); 1622 if (rib_stat->hca->state != HCA_INITED) { 1623 rw_exit(&rib_stat->hca->state_lock); 1624 return (RDMA_FAILED); 1625 } 1626 status = rib_ping_srv(addr_type, raddr, &hca); 1627 rw_exit(&rib_stat->hca->state_lock); 1628 1629 if (status == RDMA_SUCCESS) { 1630 *handle = (void *)hca; 1631 /* 1632 * Register the Address translation service 1633 */ 1634 mutex_enter(&rib_stat->open_hca_lock); 1635 if (ats_running == 0) { 1636 if (rib_register_ats(rib_stat->hca) 1637 == RDMA_SUCCESS) { 1638 ats_running = 1; 1639 mutex_exit(&rib_stat->open_hca_lock); 1640 return (RDMA_SUCCESS); 1641 } else { 1642 mutex_exit(&rib_stat->open_hca_lock); 1643 return (RDMA_FAILED); 1644 } 1645 } else { 1646 mutex_exit(&rib_stat->open_hca_lock); 1647 return (RDMA_SUCCESS); 1648 } 1649 } else { 1650 *handle = NULL; 1651 if (rib_debug > 2) 1652 cmn_err(CE_WARN, "rib_reachable(): ping_srv failed.\n"); 1653 return (RDMA_FAILED); 1654 } 1655 } 1656 1657 /* Client side qp creation */ 1658 static rdma_stat 1659 rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp) 1660 { 1661 rib_qp_t *kqp = NULL; 1662 CONN *conn; 1663 rdma_clnt_cred_ctrl_t *cc_info; 1664 1665 ASSERT(qp != NULL); 1666 *qp = NULL; 1667 1668 kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP); 1669 conn = qptoc(kqp); 1670 kqp->hca = hca; 1671 kqp->rdmaconn.c_rdmamod = &rib_mod; 1672 kqp->rdmaconn.c_private = (caddr_t)kqp; 1673 1674 kqp->mode = RIB_CLIENT; 1675 kqp->chan_flags = IBT_BLOCKING; 1676 conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP); 1677 bcopy(raddr->buf, conn->c_raddr.buf, raddr->len); 1678 conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len; 1679 1680 /* 1681 * Initialize 1682 */ 1683 cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL); 1684 cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL); 1685 mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 1686 mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock); 1687 mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 1688 mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 1689 cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL); 1690 mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock); 1691 #if defined (CLNT_INTERRUPT_COAL) 1692 kqp->rdmaconn.c_count = 0; 1693 conn->c_count = 0; 1694 bzero(&kqp->wd, sizeof(struct send_wid)); 1695 kqp->wd.forw = kqp->wd.back = &kqp->wd; 1696 #endif 1697 /* 1698 * Initialize the client credit control 1699 * portion of the rdmaconn struct. 1700 */ 1701 kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT; 1702 cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc; 1703 cc_info->clnt_cc_granted_ops = 0; 1704 cc_info->clnt_cc_in_flight_ops = 0; 1705 cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL); 1706 1707 *qp = kqp; 1708 return (RDMA_SUCCESS); 1709 } 1710 1711 /* Server side qp creation */ 1712 static rdma_stat 1713 rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp) 1714 { 1715 rib_qp_t *kqp = NULL; 1716 ibt_chan_sizes_t chan_sizes; 1717 ibt_rc_chan_alloc_args_t qp_attr; 1718 ibt_status_t ibt_status; 1719 rdma_srv_cred_ctrl_t *cc_info; 1720 1721 ASSERT(qp != NULL); 1722 *qp = NULL; 1723 1724 kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP); 1725 kqp->hca = hca; 1726 kqp->port_num = port; 1727 kqp->rdmaconn.c_rdmamod = &rib_mod; 1728 kqp->rdmaconn.c_private = (caddr_t)kqp; 1729 1730 /* 1731 * Create the qp handle 1732 */ 1733 bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t)); 1734 qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl; 1735 qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl; 1736 qp_attr.rc_pd = hca->pd_hdl; 1737 qp_attr.rc_hca_port_num = port; 1738 qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX; 1739 qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX; 1740 qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE; 1741 qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE; 1742 qp_attr.rc_clone_chan = NULL; 1743 qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1744 qp_attr.rc_flags = IBT_WR_SIGNALED; 1745 1746 rw_enter(&hca->state_lock, RW_READER); 1747 if (hca->state != HCA_DETACHED) { 1748 ibt_status = ibt_alloc_rc_channel(hca->hca_hdl, 1749 IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl, 1750 &chan_sizes); 1751 } else { 1752 rw_exit(&hca->state_lock); 1753 goto fail; 1754 } 1755 rw_exit(&hca->state_lock); 1756 1757 if (ibt_status != IBT_SUCCESS) { 1758 cmn_err(CE_WARN, "rib_svc_create_chan: " 1759 "ibt_alloc_rc_channel failed, ibt_status=%d.", 1760 ibt_status); 1761 goto fail; 1762 } 1763 1764 kqp->mode = RIB_SERVER; 1765 kqp->chan_flags = IBT_BLOCKING; 1766 kqp->q = q; /* server ONLY */ 1767 1768 cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL); 1769 cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL); 1770 mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 1771 mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 1772 mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 1773 mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 1774 cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL); 1775 mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock); 1776 /* 1777 * Set the private data area to qp to be used in callbacks 1778 */ 1779 ibt_set_chan_private(kqp->qp_hdl, (void *)kqp); 1780 kqp->rdmaconn.c_state = C_CONNECTED; 1781 1782 /* 1783 * Initialize the server credit control 1784 * portion of the rdmaconn struct. 1785 */ 1786 kqp->rdmaconn.c_cc_type = RDMA_CC_SRV; 1787 cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc; 1788 cc_info->srv_cc_buffers_granted = preposted_rbufs; 1789 cc_info->srv_cc_cur_buffers_used = 0; 1790 cc_info->srv_cc_posted = preposted_rbufs; 1791 1792 *qp = kqp; 1793 1794 num_clients++; 1795 return (RDMA_SUCCESS); 1796 fail: 1797 if (kqp) 1798 kmem_free(kqp, sizeof (rib_qp_t)); 1799 1800 return (RDMA_FAILED); 1801 } 1802 1803 void 1804 rib_dump_pathrec(ibt_path_info_t *path_rec) 1805 { 1806 ib_pkey_t pkey; 1807 1808 if (rib_debug > 1) { 1809 cmn_err(CE_NOTE, "Path Record:\n"); 1810 1811 cmn_err(CE_NOTE, "Source HCA GUID = %llx\n", 1812 (longlong_t)path_rec->pi_hca_guid); 1813 cmn_err(CE_NOTE, "Dest Service ID = %llx\n", 1814 (longlong_t)path_rec->pi_sid); 1815 cmn_err(CE_NOTE, "Port Num = %02d\n", 1816 path_rec->pi_prim_cep_path.cep_hca_port_num); 1817 cmn_err(CE_NOTE, "P_Key Index = %04d\n", 1818 path_rec->pi_prim_cep_path.cep_pkey_ix); 1819 1820 (void) ibt_index2pkey_byguid(path_rec->pi_hca_guid, 1821 path_rec->pi_prim_cep_path.cep_hca_port_num, 1822 path_rec->pi_prim_cep_path.cep_pkey_ix, &pkey); 1823 cmn_err(CE_NOTE, "P_Key = 0x%x\n", pkey); 1824 1825 1826 cmn_err(CE_NOTE, "SGID: = %llx:%llx\n", 1827 (longlong_t) 1828 path_rec->pi_prim_cep_path.cep_adds_vect.av_sgid.gid_prefix, 1829 (longlong_t) 1830 path_rec->pi_prim_cep_path.cep_adds_vect.av_sgid.gid_guid); 1831 1832 cmn_err(CE_NOTE, "DGID: = %llx:%llx\n", 1833 (longlong_t) 1834 path_rec->pi_prim_cep_path.cep_adds_vect.av_dgid.gid_prefix, 1835 (longlong_t) 1836 path_rec->pi_prim_cep_path.cep_adds_vect.av_dgid.gid_guid); 1837 1838 cmn_err(CE_NOTE, "Path Rate = %02x\n", 1839 path_rec->pi_prim_cep_path.cep_adds_vect.av_srate); 1840 cmn_err(CE_NOTE, "SL = %02x\n", 1841 path_rec->pi_prim_cep_path.cep_adds_vect.av_srvl); 1842 cmn_err(CE_NOTE, "Prim Packet LT = %02x\n", 1843 path_rec->pi_prim_pkt_lt); 1844 cmn_err(CE_NOTE, "Path MTU = %02x\n", 1845 path_rec->pi_path_mtu); 1846 } 1847 } 1848 1849 /* ARGSUSED */ 1850 ibt_cm_status_t 1851 rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event, 1852 ibt_cm_return_args_t *ret_args, void *priv_data, 1853 ibt_priv_data_len_t len) 1854 { 1855 rpcib_state_t *ribstat; 1856 rib_hca_t *hca; 1857 1858 ribstat = (rpcib_state_t *)clnt_hdl; 1859 hca = (rib_hca_t *)ribstat->hca; 1860 1861 switch (event->cm_type) { 1862 1863 /* got a connection close event */ 1864 case IBT_CM_EVENT_CONN_CLOSED: 1865 { 1866 CONN *conn; 1867 rib_qp_t *qp; 1868 1869 /* check reason why connection was closed */ 1870 switch (event->cm_event.closed) { 1871 case IBT_CM_CLOSED_DREP_RCVD: 1872 case IBT_CM_CLOSED_DREQ_TIMEOUT: 1873 case IBT_CM_CLOSED_DUP: 1874 case IBT_CM_CLOSED_ABORT: 1875 case IBT_CM_CLOSED_ALREADY: 1876 /* 1877 * These cases indicate the local end initiated 1878 * the closing of the channel. Nothing to do here. 1879 */ 1880 break; 1881 default: 1882 /* 1883 * Reason for CONN_CLOSED event must be one of 1884 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD 1885 * or IBT_CM_CLOSED_STALE. These indicate cases were 1886 * the remote end is closing the channel. In these 1887 * cases free the channel and transition to error 1888 * state 1889 */ 1890 qp = ibt_get_chan_private(event->cm_channel); 1891 conn = qptoc(qp); 1892 mutex_enter(&conn->c_lock); 1893 if (conn->c_state == C_DISCONN_PEND) { 1894 mutex_exit(&conn->c_lock); 1895 break; 1896 } 1897 1898 conn->c_state = C_ERROR; 1899 1900 /* 1901 * Free the rc_channel. Channel has already 1902 * transitioned to ERROR state and WRs have been 1903 * FLUSHED_ERR already. 1904 */ 1905 (void) ibt_free_channel(qp->qp_hdl); 1906 qp->qp_hdl = NULL; 1907 1908 /* 1909 * Free the conn if c_ref is down to 0 already 1910 */ 1911 if (conn->c_ref == 0) { 1912 /* 1913 * Remove from list and free conn 1914 */ 1915 conn->c_state = C_DISCONN_PEND; 1916 mutex_exit(&conn->c_lock); 1917 (void) rib_disconnect_channel(conn, 1918 &hca->cl_conn_list); 1919 } else { 1920 mutex_exit(&conn->c_lock); 1921 } 1922 #ifdef DEBUG 1923 if (rib_debug) 1924 cmn_err(CE_NOTE, "rib_clnt_cm_handler: " 1925 "(CONN_CLOSED) channel disconnected"); 1926 #endif 1927 break; 1928 } 1929 break; 1930 } 1931 default: 1932 break; 1933 } 1934 return (IBT_CM_ACCEPT); 1935 } 1936 1937 1938 /* Check if server has done ATS registration */ 1939 rdma_stat 1940 rib_chk_srv_ats(rib_hca_t *hca, struct netbuf *raddr, 1941 int addr_type, ibt_path_info_t *path) 1942 { 1943 struct sockaddr_in *sin4; 1944 struct sockaddr_in6 *sin6; 1945 ibt_path_attr_t path_attr; 1946 ibt_status_t ibt_status; 1947 ib_pkey_t pkey; 1948 ibt_ar_t ar_query, ar_result; 1949 rib_service_t *ats; 1950 ib_gid_t sgid; 1951 ibt_path_info_t paths[MAX_PORTS]; 1952 uint8_t npaths, i; 1953 1954 (void) bzero(&path_attr, sizeof (ibt_path_attr_t)); 1955 (void) bzero(path, sizeof (ibt_path_info_t)); 1956 1957 /* 1958 * Construct svc name 1959 */ 1960 path_attr.pa_sname = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP); 1961 switch (addr_type) { 1962 case AF_INET: 1963 sin4 = (struct sockaddr_in *)raddr->buf; 1964 (void) inet_ntop(AF_INET, &sin4->sin_addr, path_attr.pa_sname, 1965 IB_SVC_NAME_LEN); 1966 break; 1967 1968 case AF_INET6: 1969 sin6 = (struct sockaddr_in6 *)raddr->buf; 1970 (void) inet_ntop(AF_INET6, &sin6->sin6_addr, 1971 path_attr.pa_sname, IB_SVC_NAME_LEN); 1972 break; 1973 1974 default: 1975 kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN); 1976 return (RDMA_INVAL); 1977 } 1978 (void) strlcat(path_attr.pa_sname, "::NFS", IB_SVC_NAME_LEN); 1979 1980 /* 1981 * Attempt a path to the server on an ATS-registered port. 1982 * Try all ATS-registered ports until one succeeds. 1983 * The first one that succeeds will be used to connect 1984 * to the server. If none of them succeed, return RDMA_FAILED. 1985 */ 1986 rw_enter(&hca->state_lock, RW_READER); 1987 if (hca->state != HCA_DETACHED) { 1988 rw_enter(&hca->service_list_lock, RW_READER); 1989 for (ats = hca->ats_list; ats != NULL; ats = ats->srv_next) { 1990 path_attr.pa_hca_guid = hca->hca_guid; 1991 path_attr.pa_hca_port_num = ats->srv_port; 1992 ibt_status = ibt_get_paths(hca->ibt_clnt_hdl, 1993 IBT_PATH_MULTI_SVC_DEST, &path_attr, 2, paths, &npaths); 1994 if (ibt_status == IBT_SUCCESS || 1995 ibt_status == IBT_INSUFF_DATA) { 1996 for (i = 0; i < npaths; i++) { 1997 if (paths[i].pi_hca_guid) { 1998 /* 1999 * do ibt_query_ar() 2000 */ 2001 sgid = 2002 paths[i].pi_prim_cep_path.cep_adds_vect.av_sgid; 2003 2004 (void) ibt_index2pkey_byguid(paths[i].pi_hca_guid, 2005 paths[i].pi_prim_cep_path.cep_hca_port_num, 2006 paths[i].pi_prim_cep_path.cep_pkey_ix, &pkey); 2007 2008 bzero(&ar_query, sizeof (ar_query)); 2009 bzero(&ar_result, sizeof (ar_result)); 2010 ar_query.ar_gid = 2011 paths[i].pi_prim_cep_path.cep_adds_vect.av_dgid; 2012 ar_query.ar_pkey = pkey; 2013 ibt_status = ibt_query_ar(&sgid, &ar_query, 2014 &ar_result); 2015 if (ibt_status == IBT_SUCCESS) { 2016 #ifdef DEBUG 2017 if (rib_debug > 1) 2018 rib_dump_pathrec(&paths[i]); 2019 #endif 2020 bcopy(&paths[i], path, 2021 sizeof (ibt_path_info_t)); 2022 rw_exit(&hca->service_list_lock); 2023 kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN); 2024 rw_exit(&hca->state_lock); 2025 return (RDMA_SUCCESS); 2026 } 2027 #ifdef DEBUG 2028 if (rib_debug) { 2029 cmn_err(CE_NOTE, "rib_chk_srv_ats: " 2030 "ibt_query_ar FAILED, return\n"); 2031 } 2032 #endif 2033 } 2034 } 2035 } 2036 } 2037 rw_exit(&hca->service_list_lock); 2038 } 2039 kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN); 2040 rw_exit(&hca->state_lock); 2041 return (RDMA_FAILED); 2042 } 2043 2044 2045 /* 2046 * Connect to the server. 2047 */ 2048 rdma_stat 2049 rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, ibt_path_info_t *path) 2050 { 2051 ibt_chan_open_args_t chan_args; /* channel args */ 2052 ibt_chan_sizes_t chan_sizes; 2053 ibt_rc_chan_alloc_args_t qp_attr; 2054 ibt_status_t ibt_status; 2055 ibt_rc_returns_t ret_args; /* conn reject info */ 2056 int refresh = REFRESH_ATTEMPTS; /* refresh if IBT_CM_CONN_STALE */ 2057 2058 (void) bzero(&chan_args, sizeof (chan_args)); 2059 (void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t)); 2060 2061 qp_attr.rc_hca_port_num = path->pi_prim_cep_path.cep_hca_port_num; 2062 /* Alloc a RC channel */ 2063 qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl; 2064 qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl; 2065 qp_attr.rc_pd = hca->pd_hdl; 2066 qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX; 2067 qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX; 2068 qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE; 2069 qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE; 2070 qp_attr.rc_clone_chan = NULL; 2071 qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 2072 qp_attr.rc_flags = IBT_WR_SIGNALED; 2073 2074 chan_args.oc_path = path; 2075 chan_args.oc_cm_handler = rib_clnt_cm_handler; 2076 chan_args.oc_cm_clnt_private = (void *)rib_stat; 2077 chan_args.oc_rdma_ra_out = 4; 2078 chan_args.oc_rdma_ra_in = 4; 2079 chan_args.oc_path_retry_cnt = 2; 2080 chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES; 2081 2082 refresh: 2083 rw_enter(&hca->state_lock, RW_READER); 2084 if (hca->state != HCA_DETACHED) { 2085 ibt_status = ibt_alloc_rc_channel(hca->hca_hdl, 2086 IBT_ACHAN_NO_FLAGS, &qp_attr, &qp->qp_hdl, 2087 &chan_sizes); 2088 } else { 2089 rw_exit(&hca->state_lock); 2090 return (RDMA_FAILED); 2091 } 2092 rw_exit(&hca->state_lock); 2093 2094 if (ibt_status != IBT_SUCCESS) { 2095 #ifdef DEBUG 2096 cmn_err(CE_WARN, "rib_conn_to_srv: alloc_rc_channel " 2097 "failed, ibt_status=%d.", ibt_status); 2098 #endif 2099 return (RDMA_FAILED); 2100 } 2101 2102 /* Connect to the Server */ 2103 (void) bzero(&ret_args, sizeof (ret_args)); 2104 mutex_enter(&qp->cb_lock); 2105 ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS, 2106 IBT_BLOCKING, &chan_args, &ret_args); 2107 if (ibt_status != IBT_SUCCESS) { 2108 #ifdef DEBUG 2109 if (rib_debug) 2110 cmn_err(CE_WARN, "rib_conn_to_srv: open_rc_channel" 2111 " failed for qp %p, status=%d, " 2112 "ret_args.rc_status=%d\n", 2113 (void *)qp, ibt_status, ret_args.rc_status); 2114 #endif 2115 (void) ibt_free_channel(qp->qp_hdl); 2116 qp->qp_hdl = NULL; 2117 mutex_exit(&qp->cb_lock); 2118 if (refresh-- && ibt_status == IBT_CM_FAILURE && 2119 ret_args.rc_status == IBT_CM_CONN_STALE) { 2120 /* 2121 * Got IBT_CM_CONN_STALE probably because of stale 2122 * data on the passive end of a channel that existed 2123 * prior to reboot. Retry establishing a channel 2124 * REFRESH_ATTEMPTS times, during which time the 2125 * stale conditions on the server might clear up. 2126 */ 2127 goto refresh; 2128 } 2129 return (RDMA_FAILED); 2130 } 2131 mutex_exit(&qp->cb_lock); 2132 /* 2133 * Set the private data area to qp to be used in callbacks 2134 */ 2135 ibt_set_chan_private(qp->qp_hdl, (void *)qp); 2136 return (RDMA_SUCCESS); 2137 } 2138 2139 rdma_stat 2140 rib_ping_srv(int addr_type, struct netbuf *raddr, rib_hca_t **hca) 2141 { 2142 struct sockaddr_in *sin4; 2143 struct sockaddr_in6 *sin6; 2144 ibt_path_attr_t path_attr; 2145 ibt_path_info_t path; 2146 ibt_status_t ibt_status; 2147 2148 ASSERT(raddr->buf != NULL); 2149 2150 bzero(&path_attr, sizeof (ibt_path_attr_t)); 2151 bzero(&path, sizeof (ibt_path_info_t)); 2152 2153 /* 2154 * Conctruct svc name 2155 */ 2156 path_attr.pa_sname = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP); 2157 switch (addr_type) { 2158 case AF_INET: 2159 sin4 = (struct sockaddr_in *)raddr->buf; 2160 (void) inet_ntop(AF_INET, &sin4->sin_addr, path_attr.pa_sname, 2161 IB_SVC_NAME_LEN); 2162 break; 2163 2164 case AF_INET6: 2165 sin6 = (struct sockaddr_in6 *)raddr->buf; 2166 (void) inet_ntop(AF_INET6, &sin6->sin6_addr, 2167 path_attr.pa_sname, IB_SVC_NAME_LEN); 2168 break; 2169 2170 default: 2171 #ifdef DEBUG 2172 if (rib_debug) { 2173 cmn_err(CE_WARN, "rib_ping_srv: Address not recognized\n"); 2174 } 2175 #endif 2176 kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN); 2177 return (RDMA_INVAL); 2178 } 2179 (void) strlcat(path_attr.pa_sname, "::NFS", IB_SVC_NAME_LEN); 2180 2181 ibt_status = ibt_get_paths(rib_stat->ibt_clnt_hdl, 2182 IBT_PATH_NO_FLAGS, &path_attr, 1, &path, NULL); 2183 kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN); 2184 if (ibt_status != IBT_SUCCESS) { 2185 if (rib_debug > 1) { 2186 cmn_err(CE_WARN, "rib_ping_srv: ibt_get_paths FAILED!" 2187 " status=%d\n", ibt_status); 2188 } 2189 } else if (path.pi_hca_guid) { 2190 ASSERT(path.pi_hca_guid == rib_stat->hca->hca_guid); 2191 *hca = rib_stat->hca; 2192 return (RDMA_SUCCESS); 2193 } 2194 return (RDMA_FAILED); 2195 } 2196 2197 /* 2198 * Close channel, remove from connection list and 2199 * free up resources allocated for that channel. 2200 */ 2201 rdma_stat 2202 rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list) 2203 { 2204 rib_qp_t *qp = ctoqp(conn); 2205 rib_hca_t *hca; 2206 2207 /* 2208 * c_ref == 0 and connection is in C_DISCONN_PEND 2209 */ 2210 hca = qp->hca; 2211 if (conn_list != NULL) 2212 (void) rib_rm_conn(conn, conn_list); 2213 if (qp->qp_hdl != NULL) { 2214 /* 2215 * If the channel has not been establised, 2216 * ibt_flush_channel is called to flush outstanding WRs 2217 * on the Qs. Otherwise, ibt_close_rc_channel() is 2218 * called. The channel is then freed. 2219 */ 2220 if (conn_list != NULL) 2221 (void) ibt_close_rc_channel(qp->qp_hdl, 2222 IBT_BLOCKING, NULL, 0, NULL, NULL, 0); 2223 else 2224 (void) ibt_flush_channel(qp->qp_hdl); 2225 2226 mutex_enter(&qp->posted_rbufs_lock); 2227 while (qp->n_posted_rbufs) 2228 cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock); 2229 mutex_exit(&qp->posted_rbufs_lock); 2230 (void) ibt_free_channel(qp->qp_hdl); 2231 qp->qp_hdl = NULL; 2232 } 2233 ASSERT(qp->rdlist == NULL); 2234 if (qp->replylist != NULL) { 2235 (void) rib_rem_replylist(qp); 2236 } 2237 2238 cv_destroy(&qp->cb_conn_cv); 2239 cv_destroy(&qp->posted_rbufs_cv); 2240 mutex_destroy(&qp->cb_lock); 2241 2242 mutex_destroy(&qp->replylist_lock); 2243 mutex_destroy(&qp->posted_rbufs_lock); 2244 mutex_destroy(&qp->rdlist_lock); 2245 2246 cv_destroy(&conn->c_cv); 2247 mutex_destroy(&conn->c_lock); 2248 2249 if (conn->c_raddr.buf != NULL) { 2250 kmem_free(conn->c_raddr.buf, conn->c_raddr.len); 2251 } 2252 if (conn->c_laddr.buf != NULL) { 2253 kmem_free(conn->c_laddr.buf, conn->c_laddr.len); 2254 } 2255 2256 /* 2257 * Credit control cleanup. 2258 */ 2259 if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) { 2260 rdma_clnt_cred_ctrl_t *cc_info; 2261 cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc; 2262 cv_destroy(&cc_info->clnt_cc_cv); 2263 } 2264 2265 kmem_free(qp, sizeof (rib_qp_t)); 2266 2267 /* 2268 * If HCA has been DETACHED and the srv/clnt_conn_list is NULL, 2269 * then the hca is no longer being used. 2270 */ 2271 if (conn_list != NULL) { 2272 rw_enter(&hca->state_lock, RW_READER); 2273 if (hca->state == HCA_DETACHED) { 2274 rw_enter(&hca->srv_conn_list.conn_lock, RW_READER); 2275 if (hca->srv_conn_list.conn_hd == NULL) { 2276 rw_enter(&hca->cl_conn_list.conn_lock, 2277 RW_READER); 2278 if (hca->cl_conn_list.conn_hd == NULL) { 2279 mutex_enter(&hca->inuse_lock); 2280 hca->inuse = FALSE; 2281 cv_signal(&hca->cb_cv); 2282 mutex_exit(&hca->inuse_lock); 2283 } 2284 rw_exit(&hca->cl_conn_list.conn_lock); 2285 } 2286 rw_exit(&hca->srv_conn_list.conn_lock); 2287 } 2288 rw_exit(&hca->state_lock); 2289 } 2290 2291 num_clients--; 2292 return (RDMA_SUCCESS); 2293 } 2294 2295 #ifdef DYNAMIC_CREDIT_CONTROL 2296 void rib_get_resource_info(CONN *conn, int *current_clients, int *avail_bufs) 2297 { 2298 rib_qp_t *qp = ctoqp(conn); 2299 rib_hca_t *hca = qp->hca; 2300 rib_bufpool_t *rbp = NULL; 2301 bufpool_t *bp; 2302 2303 is_server = 1; 2304 rbp = hca->recv_pool; 2305 2306 if (rbp == NULL) 2307 *avail_bufs = 0; 2308 else { 2309 bp = rbp->bpool; 2310 *avail_bufs = bp->buffree; 2311 } 2312 2313 *current_clients = num_clients; 2314 } 2315 #endif 2316 2317 /* 2318 * Wait for send completion notification. Only on receiving a 2319 * notification be it a successful or error completion, free the 2320 * send_wid. 2321 */ 2322 static rdma_stat 2323 rib_sendwait(rib_qp_t *qp, struct send_wid *wd) 2324 { 2325 clock_t timout, cv_wait_ret; 2326 rdma_stat error = RDMA_SUCCESS; 2327 int i; 2328 2329 /* 2330 * Wait for send to complete 2331 */ 2332 ASSERT(wd != NULL); 2333 mutex_enter(&wd->sendwait_lock); 2334 if (wd->status == (uint_t)SEND_WAIT) { 2335 timout = drv_usectohz(SEND_WAIT_TIME * 1000000) + 2336 ddi_get_lbolt(); 2337 if (qp->mode == RIB_SERVER) { 2338 while ((cv_wait_ret = cv_timedwait(&wd->wait_cv, 2339 &wd->sendwait_lock, timout)) > 0 && 2340 wd->status == (uint_t)SEND_WAIT) 2341 ; 2342 switch (cv_wait_ret) { 2343 case -1: /* timeout */ 2344 #ifdef DEBUG 2345 if (rib_debug > 2) 2346 cmn_err(CE_WARN, "rib_sendwait: " 2347 "timed out qp %p\n", (void *)qp); 2348 #endif 2349 wd->cv_sig = 0; /* no signal needed */ 2350 error = RDMA_TIMEDOUT; 2351 break; 2352 default: /* got send completion */ 2353 break; 2354 } 2355 } else { 2356 while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv, 2357 &wd->sendwait_lock, timout)) > 0 && 2358 wd->status == (uint_t)SEND_WAIT) 2359 ; 2360 switch (cv_wait_ret) { 2361 case -1: /* timeout */ 2362 #ifdef DEBUG 2363 if (rib_debug > 2) 2364 cmn_err(CE_WARN, "rib_sendwait: " 2365 "timed out qp %p\n", (void *)qp); 2366 #endif 2367 wd->cv_sig = 0; /* no signal needed */ 2368 error = RDMA_TIMEDOUT; 2369 break; 2370 case 0: /* interrupted */ 2371 #ifdef DEBUG 2372 if (rib_debug > 2) 2373 cmn_err(CE_NOTE, "rib_sendwait:" 2374 " interrupted on qp %p\n", 2375 (void *)qp); 2376 #endif 2377 wd->cv_sig = 0; /* no signal needed */ 2378 error = RDMA_INTR; 2379 break; 2380 default: /* got send completion */ 2381 break; 2382 } 2383 } 2384 } 2385 2386 if (wd->status != (uint_t)SEND_WAIT) { 2387 /* got send completion */ 2388 if (wd->status != RDMA_SUCCESS) { 2389 error = wd->status; 2390 if (wd->status != RDMA_CONNLOST) 2391 error = RDMA_FAILED; 2392 } 2393 for (i = 0; i < wd->nsbufs; i++) { 2394 rib_rbuf_free(qptoc(qp), SEND_BUFFER, 2395 (void *)(uintptr_t)wd->sbufaddr[i]); 2396 } 2397 mutex_exit(&wd->sendwait_lock); 2398 (void) rib_free_sendwait(wd); 2399 } else { 2400 mutex_exit(&wd->sendwait_lock); 2401 } 2402 2403 return (error); 2404 } 2405 2406 static struct send_wid * 2407 rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp) 2408 { 2409 struct send_wid *wd; 2410 2411 wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP); 2412 wd->xid = xid; 2413 wd->cv_sig = cv_sig; 2414 wd->qp = qp; 2415 cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL); 2416 mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL); 2417 wd->status = (uint_t)SEND_WAIT; 2418 2419 return (wd); 2420 } 2421 2422 static int 2423 rib_free_sendwait(struct send_wid *wdesc) 2424 { 2425 cv_destroy(&wdesc->wait_cv); 2426 mutex_destroy(&wdesc->sendwait_lock); 2427 kmem_free(wdesc, sizeof (*wdesc)); 2428 2429 return (0); 2430 } 2431 2432 static rdma_stat 2433 rib_rem_rep(rib_qp_t *qp, struct reply *rep) 2434 { 2435 mutex_enter(&qp->replylist_lock); 2436 if (rep != NULL) { 2437 (void) rib_remreply(qp, rep); 2438 mutex_exit(&qp->replylist_lock); 2439 return (RDMA_SUCCESS); 2440 } 2441 mutex_exit(&qp->replylist_lock); 2442 return (RDMA_FAILED); 2443 } 2444 2445 /* 2446 * Send buffers are freed here only in case of error in posting 2447 * on QP. If the post succeeded, the send buffers are freed upon 2448 * send completion in rib_sendwait() or in the scq_handler. 2449 */ 2450 rdma_stat 2451 #if defined(ASYNC_SERVER_DEREG) 2452 rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid, 2453 int send_sig, int cv_sig, caddr_t c, caddr_t c1, int l1, caddr_t c2, int l2, int l3, int l4) 2454 #else 2455 rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid, 2456 int send_sig, int cv_sig, caddr_t *swid) 2457 #endif 2458 { 2459 struct send_wid *wdesc; 2460 struct clist *clp; 2461 ibt_status_t ibt_status = IBT_SUCCESS; 2462 rdma_stat ret = RDMA_SUCCESS; 2463 ibt_send_wr_t tx_wr; 2464 int i, nds; 2465 ibt_wr_ds_t sgl[DSEG_MAX]; 2466 uint_t total_msg_size; 2467 rib_qp_t *qp = ctoqp(conn); 2468 2469 ASSERT(cl != NULL); 2470 2471 bzero(&tx_wr, sizeof (ibt_send_wr_t)); 2472 2473 nds = 0; 2474 total_msg_size = 0; 2475 clp = cl; 2476 while (clp != NULL) { 2477 if (nds >= DSEG_MAX) { 2478 cmn_err(CE_WARN, "rib_send_and_wait: DSEG_MAX" 2479 " too small!"); 2480 return (RDMA_FAILED); 2481 } 2482 sgl[nds].ds_va = clp->c_saddr; 2483 sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */ 2484 sgl[nds].ds_len = clp->c_len; 2485 total_msg_size += clp->c_len; 2486 clp = clp->c_next; 2487 nds++; 2488 } 2489 2490 if (send_sig) { 2491 /* Set SEND_SIGNAL flag. */ 2492 tx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 2493 wdesc = rib_init_sendwait(msgid, cv_sig, qp); 2494 *swid = (caddr_t)wdesc; 2495 } else { 2496 tx_wr.wr_flags = IBT_WR_NO_FLAGS; 2497 wdesc = rib_init_sendwait(msgid, 0, qp); 2498 *swid = (caddr_t)wdesc; 2499 } 2500 wdesc->nsbufs = nds; 2501 #if defined(ASYNC_SERVER_DEREG) 2502 wdesc->c = c; 2503 wdesc->c1 = c1; 2504 wdesc->c2 = c2; 2505 wdesc->l1 = l1; 2506 wdesc->l2 = l2; 2507 wdesc->wl = l3; 2508 wdesc->rl = l4; 2509 #endif 2510 for (i = 0; i < nds; i++) { 2511 wdesc->sbufaddr[i] = sgl[i].ds_va; 2512 } 2513 2514 tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 2515 tx_wr.wr_opcode = IBT_WRC_SEND; 2516 tx_wr.wr_trans = IBT_RC_SRV; 2517 tx_wr.wr_nds = nds; 2518 tx_wr.wr_sgl = sgl; 2519 2520 mutex_enter(&conn->c_lock); 2521 if (conn->c_state & C_CONNECTED) { 2522 ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL); 2523 } 2524 if (((conn->c_state & C_CONNECTED) == 0) || 2525 ibt_status != IBT_SUCCESS) { 2526 mutex_exit(&conn->c_lock); 2527 for (i = 0; i < nds; i++) { 2528 rib_rbuf_free(conn, SEND_BUFFER, 2529 (void *)(uintptr_t)wdesc->sbufaddr[i]); 2530 } 2531 (void) rib_free_sendwait(wdesc); 2532 #ifdef DEBUG 2533 if (rib_debug && ibt_status != IBT_SUCCESS) 2534 cmn_err(CE_WARN, "rib_send_and_wait: ibt_post_send " 2535 "failed! wr_id %llx on qpn %p, status=%d!", 2536 (longlong_t)tx_wr.wr_id, (void *)qp, 2537 ibt_status); 2538 #endif 2539 return (RDMA_FAILED); 2540 } 2541 mutex_exit(&conn->c_lock); 2542 2543 if (send_sig) { 2544 if (cv_sig) { 2545 /* 2546 * cv_wait for send to complete. 2547 * We can fail due to a timeout or signal or 2548 * unsuccessful send. 2549 */ 2550 ret = rib_sendwait(qp, wdesc); 2551 #ifdef DEBUG 2552 if (rib_debug > 2) 2553 if (ret != 0) { 2554 cmn_err(CE_WARN, "rib_send_and_wait: rib_sendwait " 2555 "FAILED, rdma stat=%d, wr_id %llx, qp %p!", 2556 ret, (longlong_t)tx_wr.wr_id, (void *)qp); 2557 } 2558 #endif 2559 return (ret); 2560 } 2561 } 2562 2563 return (RDMA_SUCCESS); 2564 } 2565 2566 #if defined (CLNT_INTERRUPT_COAL) 2567 rdma_stat 2568 rib_send_bl(CONN *conn, struct clist *cl, uint32_t msgid) 2569 { 2570 rdma_stat ret; 2571 struct send_wid *sd, dlist; 2572 rib_qp_t *qp = ctoqp(conn); 2573 caddr_t wd; 2574 mutex_enter(&conn->c_lock); 2575 if((conn->c_count+1) >= (preposted_rbufs/2)){ 2576 conn->c_count = 0; 2577 dlist.forw = dlist.back = &dlist; 2578 while(qp->wd.forw != &qp->wd){ 2579 sd = qp->wd.forw; 2580 remque(sd); 2581 insque(sd,&dlist); 2582 } 2583 mutex_exit(&conn->c_lock); 2584 ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd); 2585 while(dlist.forw != &dlist){ 2586 sd = dlist.forw; 2587 remque(dlist.forw); 2588 rib_scq_free((caddr_t)sd); 2589 } 2590 }else{ 2591 mutex_exit(&conn->c_lock); 2592 wd = 0; 2593 ret = rib_send_and_wait(conn, cl, msgid, 0, 0, &wd); 2594 mutex_enter(&conn->c_lock); 2595 conn->c_count ++; 2596 insque(wd, &qp->wd); 2597 mutex_exit(&conn->c_lock); 2598 } 2599 return (ret); 2600 } 2601 #endif 2602 2603 rdma_stat 2604 rib_send(CONN *conn, struct clist *cl, uint32_t msgid) 2605 { 2606 rdma_stat ret; 2607 /* send-wait & cv_signal */ 2608 #if defined(ASYNC_SERVER_DEREG) 2609 ret = rib_send_and_wait(conn, cl, msgid,1,1,0,0,0,0,0,0,0, &wd); 2610 #else 2611 ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd); 2612 #endif 2613 return (ret); 2614 } 2615 2616 #if defined(ASYNC_SERVER_DEREG) 2617 rdma_stat 2618 rib_send_nw(CONN *conn, struct clist *cl, uint32_t msgid, caddr_t c, caddr_t c1, int c2, caddr_t c3, int c4, int c5, int c6) 2619 { 2620 rdma_stat ret; 2621 caddr_t *wid; 2622 /* send-wait & cv_signal */ 2623 ret = rib_send_and_wait(conn, cl, msgid, 1, 0, c, c1, c2, c3, c4, c5, c6, wid); 2624 2625 return (ret); 2626 } 2627 #endif 2628 /* 2629 * Server interface (svc_rdma_ksend). 2630 * Send RPC reply and wait for RDMA_DONE. 2631 */ 2632 rdma_stat 2633 rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid) 2634 { 2635 rdma_stat ret = RDMA_SUCCESS; 2636 struct rdma_done_list *rd; 2637 clock_t timout, cv_wait_ret; 2638 caddr_t *wid; 2639 rib_qp_t *qp = ctoqp(conn); 2640 2641 mutex_enter(&qp->rdlist_lock); 2642 rd = rdma_done_add(qp, msgid); 2643 2644 /* No cv_signal (whether send-wait or no-send-wait) */ 2645 #if defined(ASYNC_SERVER_DEREG) 2646 ret = rib_send_and_wait(conn, cl, msgid, 1, 0, 0, 0, 0, 0, 0, 0, 0, wid); 2647 #else 2648 ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid); 2649 #endif 2650 if (ret != RDMA_SUCCESS) { 2651 #ifdef DEBUG 2652 cmn_err(CE_WARN, "rib_send_resp: send_and_wait " 2653 "failed, msgid %u, qp %p", msgid, (void *)qp); 2654 #endif 2655 rdma_done_rm(qp, rd); 2656 goto done; 2657 } 2658 2659 /* 2660 * Wait for RDMA_DONE from remote end 2661 */ 2662 timout = drv_usectohz(REPLY_WAIT_TIME * 1000000) + ddi_get_lbolt(); 2663 cv_wait_ret = cv_timedwait(&rd->rdma_done_cv, &qp->rdlist_lock, 2664 timout); 2665 rdma_done_rm(qp, rd); 2666 if (cv_wait_ret < 0) { 2667 #ifdef DEBUG 2668 if (rib_debug > 1) { 2669 cmn_err(CE_WARN, "rib_send_resp: RDMA_DONE not" 2670 " recv'd for qp %p, xid:%u\n", 2671 (void *)qp, msgid); 2672 } 2673 #endif 2674 ret = RDMA_TIMEDOUT; 2675 goto done; 2676 } 2677 2678 done: 2679 mutex_exit(&qp->rdlist_lock); 2680 return (ret); 2681 } 2682 2683 static struct recv_wid * 2684 rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid) 2685 { 2686 struct recv_wid *rwid; 2687 2688 rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP); 2689 rwid->xid = msgid; 2690 rwid->addr = sgl->ds_va; 2691 rwid->qp = qp; 2692 2693 return (rwid); 2694 } 2695 2696 static void 2697 rib_free_wid(struct recv_wid *rwid) 2698 { 2699 kmem_free(rwid, sizeof (struct recv_wid)); 2700 } 2701 2702 rdma_stat 2703 rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid) 2704 { 2705 rib_qp_t *qp = ctoqp(conn); 2706 struct clist *clp = cl; 2707 struct reply *rep; 2708 struct recv_wid *rwid; 2709 int nds; 2710 ibt_wr_ds_t sgl[DSEG_MAX]; 2711 ibt_recv_wr_t recv_wr; 2712 rdma_stat ret; 2713 ibt_status_t ibt_status; 2714 2715 /* 2716 * rdma_clnt_postrecv uses RECV_BUFFER. 2717 */ 2718 2719 nds = 0; 2720 while (cl != NULL) { 2721 if (nds >= DSEG_MAX) { 2722 cmn_err(CE_WARN, "rib_clnt_post: DSEG_MAX too small!"); 2723 ret = RDMA_FAILED; 2724 goto done; 2725 } 2726 sgl[nds].ds_va = cl->c_saddr; 2727 sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 2728 sgl[nds].ds_len = cl->c_len; 2729 cl = cl->c_next; 2730 nds++; 2731 } 2732 2733 if (nds != 1) { 2734 cmn_err(CE_WARN, "rib_clnt_post: nds!=1\n"); 2735 ret = RDMA_FAILED; 2736 goto done; 2737 } 2738 bzero(&recv_wr, sizeof (ibt_recv_wr_t)); 2739 recv_wr.wr_nds = nds; 2740 recv_wr.wr_sgl = sgl; 2741 2742 rwid = rib_create_wid(qp, &sgl[0], msgid); 2743 if (rwid) { 2744 recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid; 2745 } else { 2746 cmn_err(CE_WARN, "rib_clnt_post: out of memory"); 2747 ret = RDMA_NORESOURCE; 2748 goto done; 2749 } 2750 rep = rib_addreplylist(qp, msgid); 2751 if (!rep) { 2752 cmn_err(CE_WARN, "rib_clnt_post: out of memory"); 2753 rib_free_wid(rwid); 2754 ret = RDMA_NORESOURCE; 2755 goto done; 2756 } 2757 2758 mutex_enter(&conn->c_lock); 2759 if (conn->c_state & C_CONNECTED) { 2760 ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL); 2761 } 2762 if (((conn->c_state & C_CONNECTED) == 0) || 2763 ibt_status != IBT_SUCCESS) { 2764 mutex_exit(&conn->c_lock); 2765 #ifdef DEBUG 2766 cmn_err(CE_WARN, "rib_clnt_post: QPN %p failed in " 2767 "ibt_post_recv(), msgid=%d, status=%d", 2768 (void *)qp, msgid, ibt_status); 2769 #endif 2770 rib_free_wid(rwid); 2771 (void) rib_rem_rep(qp, rep); 2772 ret = RDMA_FAILED; 2773 goto done; 2774 } 2775 mutex_exit(&conn->c_lock); 2776 return (RDMA_SUCCESS); 2777 2778 done: 2779 while (clp != NULL) { 2780 rib_rbuf_free(conn, RECV_BUFFER, (void *)(uintptr_t)clp->c_saddr); 2781 clp = clp->c_next; 2782 } 2783 return (ret); 2784 } 2785 2786 rdma_stat 2787 rib_svc_post(CONN* conn, struct clist *cl) 2788 { 2789 rib_qp_t *qp = ctoqp(conn); 2790 struct svc_recv *s_recvp; 2791 int nds; 2792 ibt_wr_ds_t sgl[DSEG_MAX]; 2793 ibt_recv_wr_t recv_wr; 2794 ibt_status_t ibt_status; 2795 2796 nds = 0; 2797 while (cl != NULL) { 2798 if (nds >= DSEG_MAX) { 2799 cmn_err(CE_WARN, "rib_svc_post: DSEG_MAX too small!"); 2800 return (RDMA_FAILED); 2801 } 2802 sgl[nds].ds_va = cl->c_saddr; 2803 sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 2804 sgl[nds].ds_len = cl->c_len; 2805 cl = cl->c_next; 2806 nds++; 2807 } 2808 2809 if (nds != 1) { 2810 cmn_err(CE_WARN, "rib_svc_post: nds!=1\n"); 2811 rib_rbuf_free(conn, RECV_BUFFER, (caddr_t)(uintptr_t)sgl[0].ds_va); 2812 return (RDMA_FAILED); 2813 } 2814 bzero(&recv_wr, sizeof (ibt_recv_wr_t)); 2815 recv_wr.wr_nds = nds; 2816 recv_wr.wr_sgl = sgl; 2817 2818 s_recvp = rib_init_svc_recv(qp, &sgl[0]); 2819 /* Use s_recvp's addr as wr id */ 2820 recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp; 2821 mutex_enter(&conn->c_lock); 2822 if (conn->c_state & C_CONNECTED) { 2823 ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL); 2824 } 2825 if (((conn->c_state & C_CONNECTED) == 0) || 2826 ibt_status != IBT_SUCCESS) { 2827 mutex_exit(&conn->c_lock); 2828 #ifdef DEBUG 2829 cmn_err(CE_WARN, "rib_svc_post: QP %p failed in " 2830 "ibt_post_recv(), status=%d", 2831 (void *)qp, ibt_status); 2832 #endif 2833 rib_rbuf_free(conn, RECV_BUFFER, 2834 (caddr_t)(uintptr_t)sgl[0].ds_va); 2835 (void) rib_free_svc_recv(s_recvp); 2836 return (RDMA_FAILED); 2837 } 2838 mutex_exit(&conn->c_lock); 2839 2840 return (RDMA_SUCCESS); 2841 } 2842 2843 /* Client */ 2844 rdma_stat 2845 rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid) 2846 { 2847 2848 return (rib_clnt_post(conn, cl, msgid)); 2849 } 2850 2851 /* Server */ 2852 rdma_stat 2853 rib_post_recv(CONN *conn, struct clist *cl) 2854 { 2855 rib_qp_t *qp = ctoqp(conn); 2856 2857 if (rib_svc_post(conn, cl) == RDMA_SUCCESS) { 2858 mutex_enter(&qp->posted_rbufs_lock); 2859 qp->n_posted_rbufs++; 2860 mutex_exit(&qp->posted_rbufs_lock); 2861 return (RDMA_SUCCESS); 2862 } 2863 return (RDMA_FAILED); 2864 } 2865 2866 /* 2867 * Client side only interface to "recv" the rpc reply buf 2868 * posted earlier by rib_post_resp(conn, cl, msgid). 2869 */ 2870 rdma_stat 2871 rib_recv(CONN *conn, struct clist **clp, uint32_t msgid) 2872 { 2873 struct reply *rep = NULL; 2874 clock_t timout, cv_wait_ret; 2875 rdma_stat ret = RDMA_SUCCESS; 2876 rib_qp_t *qp = ctoqp(conn); 2877 2878 /* 2879 * Find the reply structure for this msgid 2880 */ 2881 mutex_enter(&qp->replylist_lock); 2882 2883 for (rep = qp->replylist; rep != NULL; rep = rep->next) { 2884 if (rep->xid == msgid) 2885 break; 2886 } 2887 if (rep != NULL) { 2888 /* 2889 * If message not yet received, wait. 2890 */ 2891 if (rep->status == (uint_t)REPLY_WAIT) { 2892 timout = ddi_get_lbolt() + 2893 drv_usectohz(REPLY_WAIT_TIME * 1000000); 2894 while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv, 2895 &qp->replylist_lock, timout)) > 0 && 2896 rep->status == (uint_t)REPLY_WAIT); 2897 2898 switch (cv_wait_ret) { 2899 case -1: /* timeout */ 2900 ret = RDMA_TIMEDOUT; 2901 break; 2902 case 0: 2903 ret = RDMA_INTR; 2904 break; 2905 default: 2906 break; 2907 } 2908 } 2909 2910 if (rep->status == RDMA_SUCCESS) { 2911 struct clist *cl = NULL; 2912 2913 /* 2914 * Got message successfully 2915 */ 2916 clist_add(&cl, 0, rep->bytes_xfer, NULL, 2917 (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL); 2918 *clp = cl; 2919 } else { 2920 if (rep->status != (uint_t)REPLY_WAIT) { 2921 /* 2922 * Got error in reply message. Free 2923 * recv buffer here. 2924 */ 2925 ret = rep->status; 2926 rib_rbuf_free(conn, RECV_BUFFER, 2927 (caddr_t)(uintptr_t)rep->vaddr_cq); 2928 } 2929 } 2930 (void) rib_remreply(qp, rep); 2931 } else { 2932 /* 2933 * No matching reply structure found for given msgid on the 2934 * reply wait list. 2935 */ 2936 ret = RDMA_INVAL; 2937 #ifdef DEBUG 2938 cmn_err(CE_WARN, "rib_recv: no matching reply for " 2939 "xid %u, qp %p\n", msgid, (void *)qp); 2940 #endif 2941 } 2942 2943 /* 2944 * Done. 2945 */ 2946 mutex_exit(&qp->replylist_lock); 2947 return (ret); 2948 } 2949 2950 /* 2951 * RDMA write a buffer to the remote address. 2952 */ 2953 rdma_stat 2954 rib_write(CONN *conn, struct clist *cl, int wait) 2955 { 2956 ibt_send_wr_t tx_wr; 2957 int cv_sig; 2958 ibt_wr_ds_t sgl[DSEG_MAX]; 2959 struct send_wid *wdesc; 2960 ibt_status_t ibt_status; 2961 rdma_stat ret = RDMA_SUCCESS; 2962 rib_qp_t *qp = ctoqp(conn); 2963 2964 if (cl == NULL) { 2965 cmn_err(CE_WARN, "rib_write: NULL clist\n"); 2966 return (RDMA_FAILED); 2967 } 2968 2969 2970 while ((cl != NULL)) { 2971 if(cl->c_len > 0){ 2972 bzero(&tx_wr, sizeof (ibt_send_wr_t)); 2973 tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_daddr; 2974 tx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_dmemhandle.mrc_rmr; /* rkey */ 2975 sgl[0].ds_va = cl->c_saddr; 2976 sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 2977 sgl[0].ds_len = cl->c_len; 2978 2979 if (wait) { 2980 tx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 2981 cv_sig = 1; 2982 } else { 2983 tx_wr.wr_flags = IBT_WR_NO_FLAGS; 2984 cv_sig = 0; 2985 } 2986 2987 wdesc = rib_init_sendwait(0, cv_sig, qp); 2988 tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 2989 tx_wr.wr_opcode = IBT_WRC_RDMAW; 2990 tx_wr.wr_trans = IBT_RC_SRV; 2991 tx_wr.wr_nds = 1; 2992 tx_wr.wr_sgl = sgl; 2993 2994 mutex_enter(&conn->c_lock); 2995 if (conn->c_state & C_CONNECTED) { 2996 ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL); 2997 } 2998 if (((conn->c_state & C_CONNECTED) == 0) || 2999 ibt_status != IBT_SUCCESS) { 3000 mutex_exit(&conn->c_lock); 3001 (void) rib_free_sendwait(wdesc); 3002 return (RDMA_FAILED); 3003 } 3004 mutex_exit(&conn->c_lock); 3005 3006 /* 3007 * Wait for send to complete 3008 */ 3009 if (wait) { 3010 ret = rib_sendwait(qp, wdesc); 3011 if (ret != 0) { 3012 return (ret); 3013 } 3014 } 3015 } 3016 cl = cl->c_next; 3017 } 3018 return (RDMA_SUCCESS); 3019 } 3020 3021 /* 3022 * RDMA Read a buffer from the remote address. 3023 */ 3024 rdma_stat 3025 rib_read(CONN *conn, struct clist *cl, int wait) 3026 { 3027 ibt_send_wr_t rx_wr; 3028 int nds; 3029 int cv_sig; 3030 ibt_wr_ds_t sgl[DSEG_MAX]; /* is 2 sufficient? */ 3031 struct send_wid *wdesc; 3032 ibt_status_t ibt_status = IBT_SUCCESS; 3033 rdma_stat ret = RDMA_SUCCESS; 3034 rib_qp_t *qp = ctoqp(conn); 3035 3036 if (cl == NULL) { 3037 cmn_err(CE_WARN, "rib_read: NULL clist\n"); 3038 return (RDMA_FAILED); 3039 } 3040 3041 bzero(&rx_wr, sizeof (ibt_send_wr_t)); 3042 /* 3043 * Remote address is at the head chunk item in list. 3044 */ 3045 rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_saddr; 3046 rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr; /* rkey */ 3047 3048 nds = 0; 3049 while (cl != NULL) { 3050 if (nds >= DSEG_MAX) { 3051 cmn_err(CE_WARN, "rib_read: DSEG_MAX too small!"); 3052 return (RDMA_FAILED); 3053 } 3054 sgl[nds].ds_va = cl->c_daddr; 3055 sgl[nds].ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */ 3056 sgl[nds].ds_len = cl->c_len; 3057 cl = cl->c_next; 3058 nds++; 3059 } 3060 3061 if (wait) { 3062 rx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 3063 cv_sig = 1; 3064 } else { 3065 rx_wr.wr_flags = IBT_WR_NO_FLAGS; 3066 cv_sig = 0; 3067 } 3068 3069 wdesc = rib_init_sendwait(0, cv_sig, qp); 3070 rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 3071 rx_wr.wr_opcode = IBT_WRC_RDMAR; 3072 rx_wr.wr_trans = IBT_RC_SRV; 3073 rx_wr.wr_nds = nds; 3074 rx_wr.wr_sgl = sgl; 3075 3076 mutex_enter(&conn->c_lock); 3077 if (conn->c_state & C_CONNECTED) { 3078 ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL); 3079 } 3080 if (((conn->c_state & C_CONNECTED) == 0) || 3081 ibt_status != IBT_SUCCESS) { 3082 mutex_exit(&conn->c_lock); 3083 #ifdef DEBUG 3084 if (rib_debug && ibt_status != IBT_SUCCESS) 3085 cmn_err(CE_WARN, "rib_read: FAILED post_sending RDMAR" 3086 " wr_id %llx on qp %p, status=%d", 3087 (longlong_t)rx_wr.wr_id, (void *)qp, 3088 ibt_status); 3089 #endif 3090 (void) rib_free_sendwait(wdesc); 3091 return (RDMA_FAILED); 3092 } 3093 mutex_exit(&conn->c_lock); 3094 3095 /* 3096 * Wait for send to complete 3097 */ 3098 if (wait) { 3099 ret = rib_sendwait(qp, wdesc); 3100 if (ret != 0) { 3101 return (ret); 3102 } 3103 } 3104 3105 return (RDMA_SUCCESS); 3106 } 3107 3108 int 3109 is_for_ipv4(ibt_ar_t *result) 3110 { 3111 int i, size = sizeof (struct in_addr); 3112 uint8_t zero = 0; 3113 3114 for (i = 0; i < (ATS_AR_DATA_LEN - size); i++) 3115 zero |= result->ar_data[i]; 3116 return (zero == 0); 3117 } 3118 3119 /* 3120 * rib_srv_cm_handler() 3121 * Connection Manager callback to handle RC connection requests. 3122 */ 3123 /* ARGSUSED */ 3124 static ibt_cm_status_t 3125 rib_srv_cm_handler(void *any, ibt_cm_event_t *event, 3126 ibt_cm_return_args_t *ret_args, void *priv_data, 3127 ibt_priv_data_len_t len) 3128 { 3129 queue_t *q; 3130 rib_qp_t *qp; 3131 rpcib_state_t *ribstat; 3132 rib_hca_t *hca; 3133 rdma_stat status = RDMA_SUCCESS; 3134 int i; 3135 struct clist cl; 3136 rdma_buf_t rdbuf = {0}; 3137 void *buf = NULL; 3138 ibt_cm_req_rcv_t cm_req_rcv; 3139 CONN *conn; 3140 ibt_status_t ibt_status; 3141 ibt_ar_t ar_query, ar_result; 3142 ib_gid_t sgid; 3143 3144 3145 ASSERT(any != NULL); 3146 ASSERT(event != NULL); 3147 3148 ribstat = (rpcib_state_t *)any; 3149 hca = (rib_hca_t *)ribstat->hca; 3150 ASSERT(hca != NULL); 3151 3152 /* got a connection request */ 3153 switch (event->cm_type) { 3154 case IBT_CM_EVENT_REQ_RCV: 3155 /* 3156 * If the plugin is in the NO_ACCEPT state, bail out. 3157 */ 3158 mutex_enter(&plugin_state_lock); 3159 if (plugin_state == NO_ACCEPT) { 3160 mutex_exit(&plugin_state_lock); 3161 return (IBT_CM_REJECT); 3162 } 3163 mutex_exit(&plugin_state_lock); 3164 3165 /* 3166 * Need to send a MRA MAD to CM so that it does not 3167 * timeout on us. 3168 */ 3169 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id, 3170 event->cm_event.req.req_timeout * 8, NULL, 0); 3171 3172 mutex_enter(&rib_stat->open_hca_lock); 3173 q = rib_stat->q; 3174 mutex_exit(&rib_stat->open_hca_lock); 3175 status = rib_svc_create_chan(hca, (caddr_t)q, 3176 event->cm_event.req.req_prim_hca_port, &qp); 3177 if (status) { 3178 #ifdef DEBUG 3179 cmn_err(CE_WARN, "rib_srv_cm_handler: " 3180 "create_channel failed %d", status); 3181 #endif 3182 return (IBT_CM_REJECT); 3183 } 3184 cm_req_rcv = event->cm_event.req; 3185 3186 #ifdef DEBUG 3187 if (rib_debug > 2) { 3188 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3189 "server recv'ed IBT_CM_EVENT_REQ_RCV\n"); 3190 cmn_err(CE_NOTE, "\t\t SID:%llx\n", 3191 (longlong_t)cm_req_rcv.req_service_id); 3192 cmn_err(CE_NOTE, "\t\t Local Port:%d\n", 3193 cm_req_rcv.req_prim_hca_port); 3194 cmn_err(CE_NOTE, 3195 "\t\t Remote GID:(prefix:%llx,guid:%llx)\n", 3196 (longlong_t)cm_req_rcv.req_prim_addr.av_dgid.gid_prefix, 3197 (longlong_t)cm_req_rcv.req_prim_addr.av_dgid.gid_guid); 3198 cmn_err(CE_NOTE, "\t\t Local GID:(prefix:%llx,guid:%llx)\n", 3199 (longlong_t)cm_req_rcv.req_prim_addr.av_sgid.gid_prefix, 3200 (longlong_t)cm_req_rcv.req_prim_addr.av_sgid.gid_guid); 3201 cmn_err(CE_NOTE, "\t\t Remote QPN:%u\n", 3202 cm_req_rcv.req_remote_qpn); 3203 cmn_err(CE_NOTE, "\t\t Remote Q_Key:%x\n", 3204 cm_req_rcv.req_remote_qkey); 3205 cmn_err(CE_NOTE, "\t\t Local QP %p (qp_hdl=%p)\n", 3206 (void *)qp, (void *)qp->qp_hdl); 3207 } 3208 3209 if (rib_debug > 2) { 3210 ibt_rc_chan_query_attr_t chan_attrs; 3211 3212 if (ibt_query_rc_channel(qp->qp_hdl, &chan_attrs) 3213 == IBT_SUCCESS) { 3214 cmn_err(CE_NOTE, "rib_svc_cm_handler: qp %p in " 3215 "CEP state %d\n", (void *)qp, chan_attrs.rc_state); 3216 } 3217 } 3218 #endif 3219 3220 ret_args->cm_ret.rep.cm_channel = qp->qp_hdl; 3221 ret_args->cm_ret.rep.cm_rdma_ra_out = 4; 3222 ret_args->cm_ret.rep.cm_rdma_ra_in = 4; 3223 ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES; 3224 3225 /* 3226 * Pre-posts RECV buffers 3227 */ 3228 conn = qptoc(qp); 3229 for (i = 0; i < preposted_rbufs; i++) { 3230 bzero(&rdbuf, sizeof (rdbuf)); 3231 rdbuf.type = RECV_BUFFER; 3232 buf = rib_rbuf_alloc(conn, &rdbuf); 3233 if (buf == NULL) { 3234 cmn_err(CE_WARN, "rib_svc_cm_handler: " 3235 "No RECV_BUFFER buf!\n"); 3236 (void) rib_disconnect_channel(conn, NULL); 3237 return (IBT_CM_REJECT); 3238 } 3239 3240 bzero(&cl, sizeof (cl)); 3241 cl.c_saddr = (uintptr_t)rdbuf.addr; 3242 cl.c_len = rdbuf.len; 3243 cl.c_smemhandle.mrc_lmr = rdbuf.handle.mrc_lmr; /* lkey */ 3244 cl.c_next = NULL; 3245 status = rib_post_recv(conn, &cl); 3246 if (status != RDMA_SUCCESS) { 3247 cmn_err(CE_WARN, "rib_srv_cm_handler: failed " 3248 "posting RPC_REQ buf to qp %p!", (void *)qp); 3249 (void) rib_disconnect_channel(conn, NULL); 3250 return (IBT_CM_REJECT); 3251 } 3252 } 3253 (void) rib_add_connlist(conn, &hca->srv_conn_list); 3254 3255 /* 3256 * Get the address translation service record from ATS 3257 */ 3258 rw_enter(&hca->state_lock, RW_READER); 3259 if (hca->state == HCA_DETACHED) { 3260 rw_exit(&hca->state_lock); 3261 return (IBT_CM_REJECT); 3262 } 3263 rw_exit(&hca->state_lock); 3264 3265 for (i = 0; i < hca->hca_nports; i++) { 3266 ibt_status = ibt_get_port_state(hca->hca_hdl, i+1, 3267 &sgid, NULL); 3268 if (ibt_status != IBT_SUCCESS) { 3269 if (rib_debug) { 3270 cmn_err(CE_WARN, "rib_srv_cm_handler: " 3271 "ibt_get_port_state FAILED!" 3272 "status = %d\n", ibt_status); 3273 } 3274 } else { 3275 /* 3276 * do ibt_query_ar() 3277 */ 3278 bzero(&ar_query, sizeof (ar_query)); 3279 bzero(&ar_result, sizeof (ar_result)); 3280 ar_query.ar_gid = cm_req_rcv.req_prim_addr.av_dgid; 3281 ar_query.ar_pkey = event->cm_event.req.req_pkey; 3282 ibt_status = ibt_query_ar(&sgid, &ar_query, 3283 &ar_result); 3284 if (ibt_status != IBT_SUCCESS) { 3285 if (rib_debug) { 3286 cmn_err(CE_WARN, "rib_srv_cm_handler: " 3287 "ibt_query_ar FAILED!" 3288 "status = %d\n", ibt_status); 3289 } 3290 } else { 3291 conn = qptoc(qp); 3292 3293 if (is_for_ipv4(&ar_result)) { 3294 struct sockaddr_in *s; 3295 int sin_size = sizeof (struct sockaddr_in); 3296 int in_size = sizeof (struct in_addr); 3297 uint8_t *start_pos; 3298 3299 conn->c_raddr.maxlen = 3300 conn->c_raddr.len = sin_size; 3301 conn->c_raddr.buf = kmem_zalloc(sin_size, 3302 KM_SLEEP); 3303 s = (struct sockaddr_in *)conn->c_raddr.buf; 3304 s->sin_family = AF_INET; 3305 /* 3306 * For IPv4, the IP addr is stored in 3307 * the last four bytes of ar_data. 3308 */ 3309 start_pos = ar_result.ar_data + 3310 ATS_AR_DATA_LEN - in_size; 3311 bcopy(start_pos, &s->sin_addr, in_size); 3312 if (rib_debug > 1) { 3313 char print_addr[INET_ADDRSTRLEN]; 3314 3315 bzero(print_addr, INET_ADDRSTRLEN); 3316 (void) inet_ntop(AF_INET, &s->sin_addr, 3317 print_addr, INET_ADDRSTRLEN); 3318 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3319 "remote clnt_addr: %s\n", print_addr); 3320 } 3321 } else { 3322 struct sockaddr_in6 *s6; 3323 int sin6_size = sizeof (struct sockaddr_in6); 3324 3325 conn->c_raddr.maxlen = 3326 conn->c_raddr.len = sin6_size; 3327 conn->c_raddr.buf = kmem_zalloc(sin6_size, 3328 KM_SLEEP); 3329 3330 s6 = (struct sockaddr_in6 *)conn->c_raddr.buf; 3331 s6->sin6_family = AF_INET6; 3332 /* sin6_addr is stored in ar_data */ 3333 bcopy(ar_result.ar_data, &s6->sin6_addr, 3334 sizeof (struct in6_addr)); 3335 if (rib_debug > 1) { 3336 char print_addr[INET6_ADDRSTRLEN]; 3337 3338 bzero(print_addr, INET6_ADDRSTRLEN); 3339 (void) inet_ntop(AF_INET6, &s6->sin6_addr, 3340 print_addr, INET6_ADDRSTRLEN); 3341 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3342 "remote clnt_addr: %s\n", print_addr); 3343 } 3344 } 3345 return (IBT_CM_ACCEPT); 3346 } 3347 } 3348 } 3349 if (rib_debug > 1) { 3350 cmn_err(CE_WARN, "rib_srv_cm_handler: " 3351 "address record query failed!"); 3352 } 3353 break; 3354 3355 case IBT_CM_EVENT_CONN_CLOSED: 3356 { 3357 CONN *conn; 3358 rib_qp_t *qp; 3359 3360 switch (event->cm_event.closed) { 3361 case IBT_CM_CLOSED_DREP_RCVD: 3362 case IBT_CM_CLOSED_DREQ_TIMEOUT: 3363 case IBT_CM_CLOSED_DUP: 3364 case IBT_CM_CLOSED_ABORT: 3365 case IBT_CM_CLOSED_ALREADY: 3366 /* 3367 * These cases indicate the local end initiated 3368 * the closing of the channel. Nothing to do here. 3369 */ 3370 break; 3371 default: 3372 /* 3373 * Reason for CONN_CLOSED event must be one of 3374 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD 3375 * or IBT_CM_CLOSED_STALE. These indicate cases were 3376 * the remote end is closing the channel. In these 3377 * cases free the channel and transition to error 3378 * state 3379 */ 3380 qp = ibt_get_chan_private(event->cm_channel); 3381 conn = qptoc(qp); 3382 mutex_enter(&conn->c_lock); 3383 if (conn->c_state == C_DISCONN_PEND) { 3384 mutex_exit(&conn->c_lock); 3385 break; 3386 } 3387 conn->c_state = C_ERROR; 3388 3389 /* 3390 * Free the rc_channel. Channel has already 3391 * transitioned to ERROR state and WRs have been 3392 * FLUSHED_ERR already. 3393 */ 3394 (void) ibt_free_channel(qp->qp_hdl); 3395 qp->qp_hdl = NULL; 3396 3397 /* 3398 * Free the conn if c_ref goes down to 0 3399 */ 3400 if (conn->c_ref == 0) { 3401 /* 3402 * Remove from list and free conn 3403 */ 3404 conn->c_state = C_DISCONN_PEND; 3405 mutex_exit(&conn->c_lock); 3406 (void) rib_disconnect_channel(conn, 3407 &hca->srv_conn_list); 3408 } else { 3409 mutex_exit(&conn->c_lock); 3410 } 3411 #ifdef DEBUG 3412 if (rib_debug) 3413 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3414 " (CONN_CLOSED) channel disconnected"); 3415 #endif 3416 break; 3417 } 3418 break; 3419 } 3420 case IBT_CM_EVENT_CONN_EST: 3421 /* 3422 * RTU received, hence connection established. 3423 */ 3424 if (rib_debug > 1) 3425 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3426 "(CONN_EST) channel established"); 3427 break; 3428 3429 default: 3430 if (rib_debug > 2) { 3431 /* Let CM handle the following events. */ 3432 if (event->cm_type == IBT_CM_EVENT_REP_RCV) { 3433 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3434 "server recv'ed IBT_CM_EVENT_REP_RCV\n"); 3435 } else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) { 3436 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3437 "server recv'ed IBT_CM_EVENT_LAP_RCV\n"); 3438 } else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) { 3439 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3440 "server recv'ed IBT_CM_EVENT_MRA_RCV\n"); 3441 } else if (event->cm_type == IBT_CM_EVENT_APR_RCV) { 3442 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3443 "server recv'ed IBT_CM_EVENT_APR_RCV\n"); 3444 } else if (event->cm_type == IBT_CM_EVENT_FAILURE) { 3445 cmn_err(CE_NOTE, "rib_srv_cm_handler: " 3446 "server recv'ed IBT_CM_EVENT_FAILURE\n"); 3447 } 3448 } 3449 return (IBT_CM_REJECT); 3450 } 3451 3452 /* accept all other CM messages (i.e. let the CM handle them) */ 3453 return (IBT_CM_ACCEPT); 3454 } 3455 3456 static rdma_stat 3457 rib_register_ats(rib_hca_t *hca) 3458 { 3459 ibt_hca_portinfo_t *port_infop; 3460 uint_t port_size; 3461 uint_t pki, i, num_ports, nbinds; 3462 ibt_status_t ibt_status; 3463 rib_service_t *new_service, *temp_srv; 3464 rpcib_ats_t *atsp; 3465 rpcib_ibd_insts_t ibds; 3466 ib_pkey_t pkey; 3467 ibt_ar_t ar; /* address record */ 3468 3469 /* 3470 * Query all ports for the given HCA 3471 */ 3472 rw_enter(&hca->state_lock, RW_READER); 3473 if (hca->state != HCA_DETACHED) { 3474 ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop, 3475 &num_ports, &port_size); 3476 rw_exit(&hca->state_lock); 3477 } else { 3478 rw_exit(&hca->state_lock); 3479 return (RDMA_FAILED); 3480 } 3481 if (ibt_status != IBT_SUCCESS) { 3482 #ifdef DEBUG 3483 if (rib_debug) { 3484 cmn_err(CE_NOTE, "rib_register_ats: FAILED in " 3485 "ibt_query_hca_ports, status = %d\n", ibt_status); 3486 } 3487 #endif 3488 return (RDMA_FAILED); 3489 } 3490 3491 #ifdef DEBUG 3492 if (rib_debug > 1) { 3493 cmn_err(CE_NOTE, "rib_register_ats: Ports detected " 3494 "%d\n", num_ports); 3495 3496 for (i = 0; i < num_ports; i++) { 3497 if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) { 3498 cmn_err(CE_WARN, "rib_register_ats " 3499 "Port #: %d INACTIVE\n", i+1); 3500 } else if (port_infop[i].p_linkstate == 3501 IBT_PORT_ACTIVE) { 3502 cmn_err(CE_NOTE, "rib_register_ats " 3503 "Port #: %d ACTIVE\n", i+1); 3504 } 3505 } 3506 } 3507 #endif 3508 3509 ibds.rib_ibd_alloc = N_IBD_INSTANCES; 3510 ibds.rib_ibd_cnt = 0; 3511 ibds.rib_ats = (rpcib_ats_t *)kmem_zalloc(ibds.rib_ibd_alloc * 3512 sizeof (rpcib_ats_t), KM_SLEEP); 3513 rib_get_ibd_insts(&ibds); 3514 3515 if (ibds.rib_ibd_cnt == 0) { 3516 kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc * 3517 sizeof (rpcib_ats_t)); 3518 ibt_free_portinfo(port_infop, port_size); 3519 return (RDMA_FAILED); 3520 } 3521 3522 /* 3523 * Get the IP addresses of active ports and 3524 * register them with ATS. IPv4 addresses 3525 * have precedence over IPv6 addresses. 3526 */ 3527 if (get_ibd_ipaddr(&ibds) != 0) { 3528 #ifdef DEBUG 3529 if (rib_debug > 1) { 3530 cmn_err(CE_WARN, "rib_register_ats: " 3531 "get_ibd_ipaddr failed"); 3532 } 3533 #endif 3534 kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc * 3535 sizeof (rpcib_ats_t)); 3536 ibt_free_portinfo(port_infop, port_size); 3537 return (RDMA_FAILED); 3538 } 3539 3540 /* 3541 * Start ATS registration for active ports on this HCA. 3542 */ 3543 rw_enter(&hca->service_list_lock, RW_WRITER); 3544 nbinds = 0; 3545 new_service = NULL; 3546 for (i = 0; i < num_ports; i++) { 3547 if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) 3548 continue; 3549 3550 for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) { 3551 pkey = port_infop[i].p_pkey_tbl[pki]; 3552 if ((pkey & IBSRM_HB) && (pkey != IB_PKEY_INVALID_FULL)) { 3553 ar.ar_gid = port_infop[i].p_sgid_tbl[0]; 3554 ar.ar_pkey = pkey; 3555 atsp = get_ibd_entry(&ar.ar_gid, pkey, &ibds); 3556 if (atsp == NULL) 3557 continue; 3558 /* 3559 * store the sin[6]_addr in ar_data 3560 */ 3561 (void) bzero(ar.ar_data, ATS_AR_DATA_LEN); 3562 if (atsp->ras_inet_type == AF_INET) { 3563 uint8_t *start_pos; 3564 3565 /* 3566 * The ipv4 addr goes into the last 3567 * four bytes of ar_data. 3568 */ 3569 start_pos = ar.ar_data + ATS_AR_DATA_LEN - 3570 sizeof (struct in_addr); 3571 bcopy(&atsp->ras_sin.sin_addr, start_pos, 3572 sizeof (struct in_addr)); 3573 } else if (atsp->ras_inet_type == AF_INET6) { 3574 bcopy(&atsp->ras_sin6.sin6_addr, ar.ar_data, 3575 sizeof (struct in6_addr)); 3576 } else 3577 continue; 3578 3579 ibt_status = ibt_register_ar(hca->ibt_clnt_hdl, &ar); 3580 if (ibt_status == IBT_SUCCESS) { 3581 #ifdef DEBUG 3582 if (rib_debug > 1) { 3583 cmn_err(CE_WARN, "rib_register_ats: " 3584 "ibt_register_ar OK on port %d", i+1); 3585 } 3586 #endif 3587 /* 3588 * Allocate and prepare a service entry 3589 */ 3590 new_service = kmem_zalloc(sizeof (rib_service_t), 3591 KM_SLEEP); 3592 new_service->srv_port = i + 1; 3593 new_service->srv_ar = ar; 3594 new_service->srv_next = NULL; 3595 3596 /* 3597 * Add to the service list for this HCA 3598 */ 3599 new_service->srv_next = hca->ats_list; 3600 hca->ats_list = new_service; 3601 new_service = NULL; 3602 nbinds ++; 3603 } else { 3604 #ifdef DEBUG 3605 if (rib_debug > 1) { 3606 cmn_err(CE_WARN, "rib_register_ats: " 3607 "ibt_register_ar FAILED on port %d", i+1); 3608 } 3609 #endif 3610 } 3611 } 3612 } 3613 } 3614 3615 #ifdef DEBUG 3616 if (rib_debug > 1) { 3617 for (temp_srv = hca->ats_list; temp_srv != NULL; 3618 temp_srv = temp_srv->srv_next) { 3619 cmn_err(CE_NOTE, "Service: ATS, active on" 3620 " port: %d\n", temp_srv->srv_port); 3621 } 3622 } 3623 #endif 3624 3625 rw_exit(&hca->service_list_lock); 3626 kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc * sizeof (rpcib_ats_t)); 3627 ibt_free_portinfo(port_infop, port_size); 3628 3629 if (nbinds == 0) { 3630 #ifdef DEBUG 3631 if (rib_debug > 1) { 3632 cmn_err(CE_WARN, "rib_register_ats FAILED!\n"); 3633 } 3634 #endif 3635 return (RDMA_FAILED); 3636 } 3637 return (RDMA_SUCCESS); 3638 } 3639 3640 static rdma_stat 3641 rib_register_service(rib_hca_t *hca, int service_type) 3642 { 3643 ibt_srv_desc_t sdesc; 3644 ibt_srv_bind_t sbind; 3645 ibt_hca_portinfo_t *port_infop; 3646 ib_svc_id_t srv_id; 3647 ibt_srv_hdl_t srv_hdl; 3648 uint_t port_size; 3649 uint_t pki, i, j, num_ports, nbinds; 3650 ibt_status_t ibt_status; 3651 char **addrs; 3652 int addr_count; 3653 rib_service_t *new_service, *temp_srv; 3654 ib_pkey_t pkey; 3655 3656 /* 3657 * Query all ports for the given HCA 3658 */ 3659 rw_enter(&hca->state_lock, RW_READER); 3660 if (hca->state != HCA_DETACHED) { 3661 ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop, 3662 &num_ports, &port_size); 3663 rw_exit(&hca->state_lock); 3664 } else { 3665 rw_exit(&hca->state_lock); 3666 return (RDMA_FAILED); 3667 } 3668 if (ibt_status != IBT_SUCCESS) { 3669 #ifdef DEBUG 3670 cmn_err(CE_NOTE, "rib_register_service: FAILED in " 3671 "ibt_query_hca_ports, status = %d\n", ibt_status); 3672 #endif 3673 return (RDMA_FAILED); 3674 } 3675 3676 #ifdef DEBUG 3677 if (rib_debug > 1) { 3678 cmn_err(CE_NOTE, "rib_register_service: Ports detected " 3679 "%d\n", num_ports); 3680 3681 for (i = 0; i < num_ports; i++) { 3682 if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) { 3683 cmn_err(CE_WARN, "rib_register_service " 3684 "Port #: %d INACTIVE\n", i+1); 3685 } else if (port_infop[i].p_linkstate == 3686 IBT_PORT_ACTIVE) { 3687 cmn_err(CE_NOTE, "rib_register_service " 3688 "Port #: %d ACTIVE\n", i+1); 3689 } 3690 } 3691 } 3692 #endif 3693 /* 3694 * Get all the IP addresses on this system to register the 3695 * given "service type" on all DNS recognized IP addrs. 3696 * Each service type such as NFS will have all the systems 3697 * IP addresses as its different names. For now the only 3698 * type of service we support in RPCIB is NFS. 3699 */ 3700 addrs = get_ip_addrs(&addr_count); 3701 if (addrs == NULL) { 3702 #ifdef DEBUG 3703 if (rib_debug) { 3704 cmn_err(CE_WARN, "rib_register_service: " 3705 "get_ip_addrs failed\n"); 3706 } 3707 #endif 3708 ibt_free_portinfo(port_infop, port_size); 3709 return (RDMA_FAILED); 3710 } 3711 3712 #ifdef DEBUG 3713 if (rib_debug > 1) { 3714 for (i = 0; i < addr_count; i++) 3715 cmn_err(CE_NOTE, "addr %d: %s\n", i, addrs[i]); 3716 } 3717 #endif 3718 3719 rw_enter(&hca->service_list_lock, RW_WRITER); 3720 /* 3721 * Start registering and binding service to active 3722 * on active ports on this HCA. 3723 */ 3724 nbinds = 0; 3725 new_service = NULL; 3726 3727 /* 3728 * We use IP addresses as the service names for 3729 * service registration. Register each of them 3730 * with CM to obtain a svc_id and svc_hdl. We do not 3731 * register the service with machine's loopback address. 3732 */ 3733 for (j = 1; j < addr_count; j++) { 3734 (void) bzero(&srv_id, sizeof (ib_svc_id_t)); 3735 (void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t)); 3736 (void) bzero(&sdesc, sizeof (ibt_srv_desc_t)); 3737 3738 sdesc.sd_handler = rib_srv_cm_handler; 3739 sdesc.sd_flags = 0; 3740 3741 ibt_status = ibt_register_service(hca->ibt_clnt_hdl, 3742 &sdesc, 0, 1, &srv_hdl, &srv_id); 3743 if (ibt_status != IBT_SUCCESS) { 3744 #ifdef DEBUG 3745 if (rib_debug) { 3746 cmn_err(CE_WARN, "rib_register_service: " 3747 "ibt_register_service FAILED, status " 3748 "= %d\n", ibt_status); 3749 } 3750 #endif 3751 /* 3752 * No need to go on, since we failed to obtain 3753 * a srv_id and srv_hdl. Move on to the next 3754 * IP addr as a service name. 3755 */ 3756 continue; 3757 } 3758 for (i = 0; i < num_ports; i++) { 3759 if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) 3760 continue; 3761 3762 for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) { 3763 pkey = port_infop[i].p_pkey_tbl[pki]; 3764 if ((pkey & IBSRM_HB) && (pkey != IB_PKEY_INVALID_FULL)) { 3765 3766 /* 3767 * Allocate and prepare a service entry 3768 */ 3769 new_service = kmem_zalloc(1 * sizeof (rib_service_t), 3770 KM_SLEEP); 3771 new_service->srv_type = service_type; 3772 new_service->srv_port = i + 1; 3773 new_service->srv_id = srv_id; 3774 new_service->srv_hdl = srv_hdl; 3775 new_service->srv_sbind_hdl = kmem_zalloc(1 * 3776 sizeof (ibt_sbind_hdl_t), KM_SLEEP); 3777 3778 new_service->srv_name = kmem_zalloc(IB_SVC_NAME_LEN, 3779 KM_SLEEP); 3780 (void) bcopy(addrs[j], new_service->srv_name, 3781 IB_SVC_NAME_LEN); 3782 (void) strlcat(new_service->srv_name, "::NFS", 3783 IB_SVC_NAME_LEN); 3784 new_service->srv_next = NULL; 3785 3786 /* 3787 * Bind the service, specified by the IP address, 3788 * to the port/pkey using the srv_hdl returned 3789 * from ibt_register_service(). 3790 */ 3791 (void) bzero(&sbind, sizeof (ibt_srv_bind_t)); 3792 sbind.sb_pkey = pkey; 3793 sbind.sb_lease = 0xFFFFFFFF; 3794 sbind.sb_key[0] = NFS_SEC_KEY0; 3795 sbind.sb_key[1] = NFS_SEC_KEY1; 3796 sbind.sb_name = new_service->srv_name; 3797 3798 #ifdef DEBUG 3799 if (rib_debug > 1) { 3800 cmn_err(CE_NOTE, "rib_register_service: " 3801 "binding service using name: %s\n", 3802 sbind.sb_name); 3803 } 3804 #endif 3805 ibt_status = ibt_bind_service(srv_hdl, 3806 port_infop[i].p_sgid_tbl[0], &sbind, rib_stat, 3807 new_service->srv_sbind_hdl); 3808 if (ibt_status != IBT_SUCCESS) { 3809 #ifdef DEBUG 3810 if (rib_debug) { 3811 cmn_err(CE_WARN, "rib_register_service: FAILED" 3812 " in ibt_bind_service, status = %d\n", 3813 ibt_status); 3814 } 3815 #endif 3816 kmem_free(new_service->srv_sbind_hdl, 3817 sizeof (ibt_sbind_hdl_t)); 3818 kmem_free(new_service->srv_name, 3819 IB_SVC_NAME_LEN); 3820 kmem_free(new_service, 3821 sizeof (rib_service_t)); 3822 new_service = NULL; 3823 continue; 3824 } 3825 #ifdef DEBUG 3826 if (rib_debug > 1) { 3827 if (ibt_status == IBT_SUCCESS) 3828 cmn_err(CE_NOTE, "rib_regstr_service: " 3829 "Serv: %s REGISTERED on port: %d", 3830 sbind.sb_name, i+1); 3831 } 3832 #endif 3833 /* 3834 * Add to the service list for this HCA 3835 */ 3836 new_service->srv_next = hca->service_list; 3837 hca->service_list = new_service; 3838 new_service = NULL; 3839 nbinds ++; 3840 } 3841 } 3842 } 3843 } 3844 rw_exit(&hca->service_list_lock); 3845 3846 #ifdef DEBUG 3847 if (rib_debug > 1) { 3848 /* 3849 * Change this print to a more generic one, as rpcib 3850 * is supposed to handle multiple service types. 3851 */ 3852 for (temp_srv = hca->service_list; temp_srv != NULL; 3853 temp_srv = temp_srv->srv_next) { 3854 cmn_err(CE_NOTE, "NFS-IB, active on port:" 3855 " %d\n" 3856 "Using name: %s", temp_srv->srv_port, 3857 temp_srv->srv_name); 3858 } 3859 } 3860 #endif 3861 3862 ibt_free_portinfo(port_infop, port_size); 3863 for (i = 0; i < addr_count; i++) { 3864 if (addrs[i]) 3865 kmem_free(addrs[i], IB_SVC_NAME_LEN); 3866 } 3867 kmem_free(addrs, addr_count * sizeof (char *)); 3868 3869 if (nbinds == 0) { 3870 #ifdef DEBUG 3871 if (rib_debug) { 3872 cmn_err(CE_WARN, "rib_register_service: " 3873 "bind_service FAILED!\n"); 3874 } 3875 #endif 3876 return (RDMA_FAILED); 3877 } else { 3878 /* 3879 * Put this plugin into accept state, since atleast 3880 * one registration was successful. 3881 */ 3882 mutex_enter(&plugin_state_lock); 3883 plugin_state = ACCEPT; 3884 mutex_exit(&plugin_state_lock); 3885 return (RDMA_SUCCESS); 3886 } 3887 } 3888 3889 void 3890 rib_listen(struct rdma_svc_data *rd) 3891 { 3892 rdma_stat status = RDMA_SUCCESS; 3893 3894 rd->active = 0; 3895 rd->err_code = RDMA_FAILED; 3896 3897 /* 3898 * First check if a hca is still attached 3899 */ 3900 rw_enter(&rib_stat->hca->state_lock, RW_READER); 3901 if (rib_stat->hca->state != HCA_INITED) { 3902 rw_exit(&rib_stat->hca->state_lock); 3903 return; 3904 } 3905 rw_exit(&rib_stat->hca->state_lock); 3906 3907 rib_stat->q = &rd->q; 3908 /* 3909 * Register the Address translation service 3910 */ 3911 mutex_enter(&rib_stat->open_hca_lock); 3912 if (ats_running == 0) { 3913 if (rib_register_ats(rib_stat->hca) != RDMA_SUCCESS) { 3914 #ifdef DEBUG 3915 if (rib_debug) { 3916 cmn_err(CE_WARN, 3917 "rib_listen(): ats registration failed!"); 3918 } 3919 #endif 3920 mutex_exit(&rib_stat->open_hca_lock); 3921 return; 3922 } else { 3923 ats_running = 1; 3924 } 3925 } 3926 mutex_exit(&rib_stat->open_hca_lock); 3927 3928 /* 3929 * Right now the only service type is NFS. Hence force feed this 3930 * value. Ideally to communicate the service type it should be 3931 * passed down in rdma_svc_data. 3932 */ 3933 rib_stat->service_type = NFS; 3934 status = rib_register_service(rib_stat->hca, NFS); 3935 if (status != RDMA_SUCCESS) { 3936 rd->err_code = status; 3937 return; 3938 } 3939 /* 3940 * Service active on an HCA, check rd->err_code for more 3941 * explainable errors. 3942 */ 3943 rd->active = 1; 3944 rd->err_code = status; 3945 } 3946 3947 /* XXXX */ 3948 /* ARGSUSED */ 3949 static void 3950 rib_listen_stop(struct rdma_svc_data *svcdata) 3951 { 3952 rib_hca_t *hca; 3953 3954 /* 3955 * KRPC called the RDMATF to stop the listeners, this means 3956 * stop sending incomming or recieved requests to KRPC master 3957 * transport handle for RDMA-IB. This is also means that the 3958 * master transport handle, responsible for us, is going away. 3959 */ 3960 mutex_enter(&plugin_state_lock); 3961 plugin_state = NO_ACCEPT; 3962 if (svcdata != NULL) 3963 svcdata->active = 0; 3964 mutex_exit(&plugin_state_lock); 3965 3966 /* 3967 * First check if a hca is still attached 3968 */ 3969 hca = rib_stat->hca; 3970 rw_enter(&hca->state_lock, RW_READER); 3971 if (hca->state != HCA_INITED) { 3972 rw_exit(&hca->state_lock); 3973 return; 3974 } 3975 rib_stop_services(hca); 3976 rw_exit(&hca->state_lock); 3977 } 3978 3979 /* 3980 * Traverse the HCA's service list to unbind and deregister services. 3981 * Instead of unbinding the service for a service handle by 3982 * calling ibt_unbind_service() for each port/pkey, we unbind 3983 * all the services for the service handle by making only one 3984 * call to ibt_unbind_all_services(). Then, we deregister the 3985 * service for the service handle. 3986 * 3987 * When traversing the entries in service_list, we compare the 3988 * srv_hdl of the current entry with that of the next. If they 3989 * are different or if the next entry is NULL, the current entry 3990 * marks the last binding of the service handle. In this case, 3991 * call ibt_unbind_all_services() and deregister the service for 3992 * the service handle. If they are the same, the current and the 3993 * next entries are bound to the same service handle. In this 3994 * case, move on to the next entry. 3995 */ 3996 static void 3997 rib_stop_services(rib_hca_t *hca) 3998 { 3999 rib_service_t *srv_list, *to_remove; 4000 ibt_status_t ibt_status; 4001 4002 /* 4003 * unbind and deregister the services for this service type. 4004 * Right now there is only one service type. In future it will 4005 * be passed down to this function. 4006 */ 4007 rw_enter(&hca->service_list_lock, RW_WRITER); 4008 srv_list = hca->service_list; 4009 while (srv_list != NULL) { 4010 to_remove = srv_list; 4011 srv_list = to_remove->srv_next; 4012 if (srv_list == NULL || bcmp(to_remove->srv_hdl, 4013 srv_list->srv_hdl, sizeof (ibt_srv_hdl_t))) { 4014 4015 ibt_status = ibt_unbind_all_services(to_remove->srv_hdl); 4016 if (ibt_status != IBT_SUCCESS) { 4017 cmn_err(CE_WARN, "rib_listen_stop: " 4018 "ibt_unbind_all_services FAILED" 4019 " status: %d\n", ibt_status); 4020 } 4021 4022 ibt_status = 4023 ibt_deregister_service(hca->ibt_clnt_hdl, 4024 to_remove->srv_hdl); 4025 if (ibt_status != IBT_SUCCESS) { 4026 cmn_err(CE_WARN, "rib_listen_stop: " 4027 "ibt_deregister_service FAILED" 4028 " status: %d\n", ibt_status); 4029 } 4030 4031 #ifdef DEBUG 4032 if (rib_debug > 1) { 4033 if (ibt_status == IBT_SUCCESS) 4034 cmn_err(CE_NOTE, "rib_listen_stop: " 4035 "Successfully stopped and" 4036 " UNREGISTERED service: %s\n", 4037 to_remove->srv_name); 4038 } 4039 #endif 4040 } 4041 kmem_free(to_remove->srv_name, IB_SVC_NAME_LEN); 4042 kmem_free(to_remove->srv_sbind_hdl, 4043 sizeof (ibt_sbind_hdl_t)); 4044 4045 kmem_free(to_remove, sizeof (rib_service_t)); 4046 } 4047 hca->service_list = NULL; 4048 rw_exit(&hca->service_list_lock); 4049 } 4050 4051 static struct svc_recv * 4052 rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl) 4053 { 4054 struct svc_recv *recvp; 4055 4056 recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP); 4057 recvp->vaddr = sgl->ds_va; 4058 recvp->qp = qp; 4059 recvp->bytes_xfer = 0; 4060 return (recvp); 4061 } 4062 4063 static int 4064 rib_free_svc_recv(struct svc_recv *recvp) 4065 { 4066 kmem_free(recvp, sizeof (*recvp)); 4067 4068 return (0); 4069 } 4070 4071 static struct reply * 4072 rib_addreplylist(rib_qp_t *qp, uint32_t msgid) 4073 { 4074 struct reply *rep; 4075 4076 4077 rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP); 4078 if (rep == NULL) { 4079 mutex_exit(&qp->replylist_lock); 4080 cmn_err(CE_WARN, "rib_addreplylist: no memory\n"); 4081 return (NULL); 4082 } 4083 rep->xid = msgid; 4084 rep->vaddr_cq = NULL; 4085 rep->bytes_xfer = 0; 4086 rep->status = (uint_t)REPLY_WAIT; 4087 rep->prev = NULL; 4088 cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL); 4089 4090 mutex_enter(&qp->replylist_lock); 4091 if (qp->replylist) { 4092 rep->next = qp->replylist; 4093 qp->replylist->prev = rep; 4094 } 4095 qp->rep_list_size++; 4096 if (rib_debug > 1) 4097 cmn_err(CE_NOTE, "rib_addreplylist: qp:%p, rep_list_size:%d\n", 4098 (void *)qp, qp->rep_list_size); 4099 qp->replylist = rep; 4100 mutex_exit(&qp->replylist_lock); 4101 4102 return (rep); 4103 } 4104 4105 static rdma_stat 4106 rib_rem_replylist(rib_qp_t *qp) 4107 { 4108 struct reply *r, *n; 4109 4110 mutex_enter(&qp->replylist_lock); 4111 for (r = qp->replylist; r != NULL; r = n) { 4112 n = r->next; 4113 (void) rib_remreply(qp, r); 4114 } 4115 mutex_exit(&qp->replylist_lock); 4116 4117 return (RDMA_SUCCESS); 4118 } 4119 4120 static int 4121 rib_remreply(rib_qp_t *qp, struct reply *rep) 4122 { 4123 4124 ASSERT(MUTEX_HELD(&qp->replylist_lock)); 4125 if (rep->prev) { 4126 rep->prev->next = rep->next; 4127 } 4128 if (rep->next) { 4129 rep->next->prev = rep->prev; 4130 } 4131 if (qp->replylist == rep) 4132 qp->replylist = rep->next; 4133 4134 cv_destroy(&rep->wait_cv); 4135 qp->rep_list_size--; 4136 if (rib_debug > 1) 4137 cmn_err(CE_NOTE, "rib_remreply: qp:%p, rep_list_size:%d\n", 4138 (void *)qp, qp->rep_list_size); 4139 4140 kmem_free(rep, sizeof (*rep)); 4141 4142 return (0); 4143 } 4144 4145 rdma_stat 4146 rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 4147 struct mrc *buf_handle) 4148 { 4149 ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 4150 #ifdef IB_FMR_SUP 4151 ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */ 4152 ibt_ma_hdl_t ma_hdl = NULL; 4153 #endif 4154 ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 4155 rdma_stat status; 4156 rib_hca_t *hca = (ctoqp(conn))->hca; 4157 4158 /* 4159 * Note: ALL buffer pools use the same memory type RDMARW. 4160 */ 4161 #ifdef IB_FMR_SUP 4162 status = rib_reg_mem_fmr(hca, adsp, buf, buflen, 0, &mr_hdl, &ma_hdl, 4163 &pmr_desc); 4164 if (status == RDMA_SUCCESS) { 4165 buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 4166 buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey; 4167 buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey; 4168 buf_handle->mrc_lma = (uintptr_t)ma_hdl; 4169 goto ret_stat; 4170 } else { 4171 buf_handle->mrc_linfo = NULL; 4172 buf_handle->mrc_lma = NULL; 4173 buf_handle->mrc_lmr = 0; 4174 buf_handle->mrc_rmr = 0; 4175 } 4176 #endif 4177 status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 4178 if (status == RDMA_SUCCESS) { 4179 buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 4180 buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 4181 buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 4182 } else { 4183 buf_handle->mrc_linfo = NULL; 4184 buf_handle->mrc_lmr = 0; 4185 buf_handle->mrc_rmr = 0; 4186 } 4187 ret_stat: 4188 return (status); 4189 } 4190 4191 #ifdef IB_FMR_SUP 4192 static rdma_stat 4193 rib_reg_mem_fmr(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec, 4194 ibt_mr_hdl_t *mr_hdlp, ibt_ma_hdl_t *ma_hdlp, ibt_pmr_desc_t *pmr_descp) 4195 { 4196 ibt_va_attr_t va_attr; 4197 ibt_phys_buf_t *paddr_list; 4198 uint_t paddr_list_len, num_paddr; 4199 size_t buf_sz = 0; 4200 ibt_pmr_attr_t pmr_attr; 4201 ib_memlen_t paddr_offset; 4202 ibt_status_t ibt_status; 4203 uint_t h_page_sz; 4204 if(adsp) 4205 return(RDMA_FAILED); 4206 bzero(&va_attr, sizeof (ibt_va_attr_t)); 4207 va_attr.va_vaddr = (ib_vaddr_t)buf; 4208 va_attr.va_len = size; 4209 va_attr.va_as = (struct as *)(caddr_t)adsp; 4210 va_attr.va_flags = IBT_VA_FMR | IBT_VA_SLEEP; 4211 if (spec == IBT_MR_NONCOHERENT) 4212 va_attr.va_flags |= IBT_VA_NONCOHERENT; 4213 va_attr.va_phys_buf_min = va_attr.va_phys_buf_max = 0; 4214 4215 h_page_sz = hca->hca_attrs.hca_page_sz * 1024; 4216 paddr_list_len = (size / h_page_sz) + 2; 4217 paddr_list = (ibt_phys_buf_t *)kmem_zalloc(sizeof (ibt_phys_buf_t) * 4218 paddr_list_len, KM_NOSLEEP); 4219 4220 if (rib_debug > 0) { 4221 cmn_err(CE_NOTE, "fmr: vaddr %p, size %d paddr_list_len %d \n", 4222 buf, size, paddr_list_len); 4223 } 4224 4225 ibt_status = ibt_map_mem_area(hca->hca_hdl, &va_attr, paddr_list_len, 4226 paddr_list, &num_paddr, &buf_sz, &paddr_offset, ma_hdlp); 4227 if (ibt_status != IBT_SUCCESS) { 4228 cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_map_mem_area failed: " 4229 "status %d", ibt_status); 4230 kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len); 4231 return (RDMA_FAILED); 4232 } 4233 4234 if (rib_debug > 0) { 4235 cmn_err(CE_NOTE,"fmr: p_laddr %p, p_size %d, buf_sz %d, p_ofset %llX\n", 4236 paddr_list[0].p_laddr, paddr_list[0].p_size, buf_sz, 4237 paddr_offset); 4238 cmn_err(CE_NOTE,"fmr: ibt_map_mem_area: ret %d, num_paddr %d, spec %d\n", 4239 ibt_status, num_paddr, spec); 4240 } 4241 4242 bzero(&pmr_attr, sizeof (ibt_pmr_attr_t)); 4243 pmr_attr.pmr_iova = (ib_vaddr_t)buf; 4244 pmr_attr.pmr_len = size; 4245 pmr_attr.pmr_num_buf = num_paddr; 4246 pmr_attr.pmr_buf_sz = buf_sz; 4247 pmr_attr.pmr_buf_list = paddr_list; 4248 pmr_attr.pmr_offset = paddr_offset; 4249 pmr_attr.pmr_flags = spec; 4250 pmr_attr.pmr_ma = *ma_hdlp; 4251 4252 ibt_status = ibt_register_physical_fmr(hca->hca_hdl, hca->fmr_pool, 4253 &pmr_attr, mr_hdlp, pmr_descp); 4254 if (ibt_status != IBT_SUCCESS) { 4255 cmn_err(CE_WARN, "rib_reg_mem_fmr: ibt_register_physical_fmr " 4256 "failed: status %d", ibt_status); 4257 (void) ibt_unmap_mem_area(hca->hca_hdl, *ma_hdlp); 4258 *ma_hdlp=NULL; 4259 kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len); 4260 return (RDMA_FAILED); 4261 } 4262 4263 if (rib_debug > 0) { 4264 cmn_err(CE_NOTE,"fmr: rkey: 0x%lX lkey: 0x%lX, iova: %p, fmr_hdl %p \n", 4265 pmr_descp->pmd_rkey, pmr_descp->pmd_lkey, 4266 pmr_descp->pmd_iova, *mr_hdlp); 4267 } 4268 4269 kmem_free(paddr_list, sizeof (ibt_phys_buf_t) * paddr_list_len); 4270 4271 return (RDMA_SUCCESS); 4272 4273 } 4274 4275 #endif 4276 static rdma_stat 4277 rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, ibt_mr_flags_t spec, 4278 ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp) 4279 { 4280 ibt_mr_attr_t mem_attr; 4281 ibt_status_t ibt_status; 4282 mem_attr.mr_vaddr = (uintptr_t)buf; 4283 mem_attr.mr_len = (ib_msglen_t)size; 4284 mem_attr.mr_as = (struct as *)(caddr_t)adsp; 4285 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 4286 IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE | 4287 IBT_MR_ENABLE_WINDOW_BIND | spec; 4288 4289 rw_enter(&hca->state_lock, RW_READER); 4290 if (hca->state == HCA_INITED) { 4291 ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl, 4292 &mem_attr, mr_hdlp, mr_descp); 4293 rw_exit(&hca->state_lock); 4294 } else { 4295 rw_exit(&hca->state_lock); 4296 return (RDMA_FAILED); 4297 } 4298 4299 if (ibt_status != IBT_SUCCESS) { 4300 cmn_err(CE_WARN, "rib_reg_mem: ibt_register_mr " 4301 "(spec:%d) failed for addr %llX, status %d", 4302 spec, (longlong_t)mem_attr.mr_vaddr, ibt_status); 4303 return (RDMA_FAILED); 4304 } 4305 return (RDMA_SUCCESS); 4306 } 4307 4308 rdma_stat 4309 rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 4310 #ifdef SERVER_REG_CACHE 4311 struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc) 4312 #else 4313 struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle) 4314 #endif 4315 { 4316 ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 4317 #ifdef IB_FMR_SUP 4318 ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */ 4319 ibt_ma_hdl_t ma_hdl = NULL; 4320 #endif 4321 #ifdef SERVER_REG_CACHE 4322 rib_lrc_entry_t *l; 4323 #endif 4324 ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 4325 rdma_stat status; 4326 rib_hca_t *hca = (ctoqp(conn))->hca; 4327 4328 /* 4329 * Non-coherent memory registration. 4330 */ 4331 #ifdef SERVER_REG_CACHE 4332 l = (rib_lrc_entry_t *)lrc; 4333 if(l){ 4334 if(l->registered){ 4335 buf_handle->mrc_linfo = (uintptr_t)l->lrc_mhandle.mrc_linfo; 4336 buf_handle->mrc_lmr = (uint32_t)l->lrc_mhandle.mrc_lmr; 4337 buf_handle->mrc_rmr = (uint32_t)l->lrc_mhandle.mrc_rmr; 4338 #ifdef IB_FMR_SUP 4339 buf_handle->mrc_lma = (uintptr_t)l->lrc_mhandle.mrc_lma; 4340 #endif 4341 *sync_handle = (RIB_SYNCMEM_HANDLE)l->lrc_mhandle.mrc_linfo; 4342 return(RDMA_SUCCESS); 4343 } else { 4344 /* Always register the whole buffer */ 4345 buf = (caddr_t)l->lrc_buf; 4346 buflen = l->lrc_len; 4347 /*cmn_err(CE_NOTE,"Register %p of length %d\n",buf,buflen);*/ 4348 } 4349 } 4350 #endif 4351 #ifdef IB_FMR_SUP 4352 status = rib_reg_mem_fmr(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl, 4353 &ma_hdl, &pmr_desc); 4354 if (status == RDMA_SUCCESS) { 4355 buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 4356 buf_handle->mrc_lma = (uintptr_t)ma_hdl; 4357 buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey; 4358 buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey; 4359 *sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl; 4360 #ifdef SERVER_REG_CACHE 4361 if(l){ 4362 l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl; 4363 l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey; 4364 l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey; 4365 l->registered = TRUE; 4366 l->lrc_mhandle.mrc_lma = (uintptr_t)ma_hdl; 4367 } 4368 #endif 4369 goto ret_stat; 4370 4371 } else { 4372 if (rib_debug > 1) 4373 cmn_err(CE_WARN,"fmr reg failed for buffer %p of length %d\n",buf,buflen); 4374 buf_handle->mrc_linfo = NULL; 4375 buf_handle->mrc_lma = NULL; 4376 buf_handle->mrc_lmr = 0; 4377 buf_handle->mrc_rmr = 0; 4378 } 4379 #endif 4380 status = rib_reg_mem(hca, adsp, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl, 4381 &mr_desc); 4382 if (status == RDMA_SUCCESS) { 4383 #ifdef SERVER_REG_CACHE 4384 if(l){ 4385 l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl; 4386 l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey; 4387 l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey; 4388 l->registered = TRUE; 4389 } 4390 #endif 4391 buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 4392 buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 4393 buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 4394 *sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl; 4395 } else { 4396 buf_handle->mrc_linfo = NULL; 4397 buf_handle->mrc_lmr = 0; 4398 buf_handle->mrc_rmr = 0; 4399 } 4400 ret_stat: 4401 return (status); 4402 } 4403 4404 /* ARGSUSED */ 4405 rdma_stat 4406 rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle) 4407 { 4408 avl_index_t where = NULL; 4409 #ifdef IB_FMR_SUP 4410 ibt_status_t ibt_status; 4411 #endif 4412 rib_hca_t *hca = (ctoqp(conn))->hca; 4413 /* 4414 * Allow memory deregistration even if HCA is 4415 * getting detached. Need all outstanding 4416 * memory registrations to be deregistered 4417 * before HCA_DETACH_EVENT can be accepted. 4418 */ 4419 #ifdef IB_FMR_SUP 4420 if(buf_handle.mrc_lma){ 4421 ibt_status = ibt_unmap_mem_area(hca->hca_hdl, 4422 (ibt_ma_hdl_t)buf_handle.mrc_lma); 4423 if (ibt_status != IBT_SUCCESS){ 4424 cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed", 4425 ibt_status); 4426 return (RDMA_FAILED); 4427 } 4428 4429 ibt_status = ibt_deregister_fmr(hca->hca_hdl, 4430 (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo); 4431 if (ibt_status != IBT_SUCCESS) 4432 return (RDMA_FAILED); 4433 return (RDMA_SUCCESS); 4434 } 4435 #endif 4436 (void) ibt_deregister_mr(hca->hca_hdl, 4437 (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo); 4438 return (RDMA_SUCCESS); 4439 } 4440 4441 /* ARGSUSED */ 4442 rdma_stat 4443 rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle, 4444 #ifdef SERVER_REG_CACHE 4445 RIB_SYNCMEM_HANDLE sync_handle, void *lrc) 4446 #else 4447 RIB_SYNCMEM_HANDLE sync_handle) 4448 #endif 4449 { 4450 #ifdef SERVER_REG_CACHE 4451 rib_lrc_entry_t *l; 4452 l = (rib_lrc_entry_t *)lrc; 4453 if(l) 4454 if(l->registered) 4455 return(RDMA_SUCCESS); 4456 #endif 4457 4458 4459 (void) rib_deregistermem(conn, buf, buf_handle); 4460 4461 return (RDMA_SUCCESS); 4462 } 4463 4464 /* ARGSUSED */ 4465 rdma_stat 4466 rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf, 4467 int len, int cpu) 4468 { 4469 ibt_status_t status; 4470 rib_hca_t *hca = (ctoqp(conn))->hca; 4471 ibt_mr_sync_t mr_segment; 4472 4473 mr_segment.ms_handle = (ibt_mr_hdl_t)shandle; 4474 mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf; 4475 mr_segment.ms_len = (ib_memlen_t)len; 4476 if (cpu) { 4477 /* make incoming data visible to memory */ 4478 mr_segment.ms_flags = IBT_SYNC_WRITE; 4479 } else { 4480 /* make memory changes visible to IO */ 4481 mr_segment.ms_flags = IBT_SYNC_READ; 4482 } 4483 rw_enter(&hca->state_lock, RW_READER); 4484 if (hca->state == HCA_INITED) { 4485 status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1); 4486 rw_exit(&hca->state_lock); 4487 } else { 4488 rw_exit(&hca->state_lock); 4489 return (RDMA_FAILED); 4490 } 4491 4492 if (status == IBT_SUCCESS) 4493 return (RDMA_SUCCESS); 4494 else { 4495 #ifdef DEBUG 4496 cmn_err(CE_WARN, "rib_syncmem: ibt_sync_mr failed with %d\n", 4497 status); 4498 #endif 4499 return (RDMA_FAILED); 4500 } 4501 } 4502 4503 /* 4504 * XXXX ???? 4505 */ 4506 static rdma_stat 4507 rib_getinfo(rdma_info_t *info) 4508 { 4509 /* 4510 * XXXX Hack! 4511 */ 4512 info->addrlen = 16; 4513 info->mts = 1000000; 4514 info->mtu = 1000000; 4515 4516 return (RDMA_SUCCESS); 4517 } 4518 4519 rib_bufpool_t * 4520 rib_rbufpool_create(rib_hca_t *hca, int ptype, int num) 4521 { 4522 rib_bufpool_t *rbp = NULL; 4523 bufpool_t *bp = NULL; 4524 caddr_t buf; 4525 ibt_mr_attr_t mem_attr; 4526 ibt_status_t ibt_status; 4527 int i, j; 4528 4529 rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP); 4530 4531 bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) + 4532 num * sizeof (void *), KM_SLEEP); 4533 4534 mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock); 4535 bp->numelems = num; 4536 4537 4538 switch (ptype) { 4539 case SEND_BUFFER: 4540 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 4541 bp->rsize = RPC_MSG_SZ; 4542 break; 4543 case RECV_BUFFER: 4544 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 4545 bp->rsize = RPC_BUF_SIZE; 4546 break; 4547 default: 4548 goto fail; 4549 } 4550 4551 /* 4552 * Register the pool. 4553 */ 4554 bp->bufsize = num * bp->rsize; 4555 bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP); 4556 rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num * 4557 sizeof (ibt_mr_hdl_t), KM_SLEEP); 4558 rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num * 4559 sizeof (ibt_mr_desc_t), KM_SLEEP); 4560 rw_enter(&hca->state_lock, RW_READER); 4561 if (hca->state != HCA_INITED) { 4562 rw_exit(&hca->state_lock); 4563 cmn_err(CE_WARN,"hca->state != HCA_INITED"); 4564 goto fail; 4565 } 4566 for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) { 4567 bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t)); 4568 mem_attr.mr_vaddr = (uintptr_t)buf; 4569 mem_attr.mr_len = (ib_msglen_t)bp->rsize; 4570 mem_attr.mr_as = NULL; 4571 ibt_status = ibt_register_mr(hca->hca_hdl, 4572 hca->pd_hdl, &mem_attr, &rbp->mr_hdl[i], 4573 &rbp->mr_desc[i]); 4574 if (ibt_status != IBT_SUCCESS) { 4575 for (j = 0; j < i; j++) { 4576 (void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[j]); 4577 } 4578 rw_exit(&hca->state_lock); 4579 goto fail; 4580 } 4581 } 4582 rw_exit(&hca->state_lock); 4583 buf = (caddr_t)bp->buf; 4584 for (i = 0; i < num; i++, buf += bp->rsize) { 4585 bp->buflist[i] = (void *)buf; 4586 } 4587 bp->buffree = num - 1; /* no. of free buffers */ 4588 rbp->bpool = bp; 4589 4590 return (rbp); 4591 fail: 4592 if (bp) { 4593 if (bp->buf) 4594 kmem_free(bp->buf, bp->bufsize); 4595 kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *)); 4596 } 4597 if (rbp) { 4598 if (rbp->mr_hdl) 4599 kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t)); 4600 if (rbp->mr_desc) 4601 kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t)); 4602 kmem_free(rbp, sizeof (rib_bufpool_t)); 4603 } 4604 return (NULL); 4605 } 4606 4607 static void 4608 rib_rbufpool_deregister(rib_hca_t *hca, int ptype) 4609 { 4610 int i; 4611 rib_bufpool_t *rbp = NULL; 4612 bufpool_t *bp; 4613 4614 /* 4615 * Obtain pool address based on type of pool 4616 */ 4617 switch (ptype) { 4618 case SEND_BUFFER: 4619 rbp = hca->send_pool; 4620 break; 4621 case RECV_BUFFER: 4622 rbp = hca->recv_pool; 4623 break; 4624 default: 4625 return; 4626 } 4627 if (rbp == NULL) 4628 return; 4629 4630 bp = rbp->bpool; 4631 4632 /* 4633 * Deregister the pool memory and free it. 4634 */ 4635 for (i = 0; i < bp->numelems; i++) { 4636 (void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]); 4637 } 4638 } 4639 4640 static void 4641 rib_rbufpool_free(rib_hca_t *hca, int ptype) 4642 { 4643 4644 rib_bufpool_t *rbp = NULL; 4645 bufpool_t *bp; 4646 4647 /* 4648 * Obtain pool address based on type of pool 4649 */ 4650 switch (ptype) { 4651 case SEND_BUFFER: 4652 rbp = hca->send_pool; 4653 break; 4654 case RECV_BUFFER: 4655 rbp = hca->recv_pool; 4656 break; 4657 default: 4658 return; 4659 } 4660 if (rbp == NULL) 4661 return; 4662 4663 bp = rbp->bpool; 4664 4665 /* 4666 * Free the pool memory. 4667 */ 4668 if (rbp->mr_hdl) 4669 kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t)); 4670 4671 if (rbp->mr_desc) 4672 kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t)); 4673 if (bp->buf) 4674 kmem_free(bp->buf, bp->bufsize); 4675 mutex_destroy(&bp->buflock); 4676 kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *)); 4677 kmem_free(rbp, sizeof (rib_bufpool_t)); 4678 } 4679 4680 void 4681 rib_rbufpool_destroy(rib_hca_t *hca, int ptype) 4682 { 4683 /* 4684 * Deregister the pool memory and free it. 4685 */ 4686 rib_rbufpool_deregister(hca, ptype); 4687 rib_rbufpool_free(hca, ptype); 4688 } 4689 4690 /* 4691 * Fetch a buffer from the pool of type specified in rdbuf->type. 4692 */ 4693 static rdma_stat 4694 rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf) 4695 { 4696 4697 rdbuf->addr = rib_rbuf_alloc(conn, rdbuf); 4698 if (rdbuf->addr) { 4699 switch (rdbuf->type) { 4700 case SEND_BUFFER: 4701 rdbuf->len = RPC_MSG_SZ; /* 1K */ 4702 break; 4703 case RECV_BUFFER: 4704 rdbuf->len = RPC_BUF_SIZE; /* 2K */ 4705 break; 4706 default: 4707 rdbuf->len = 0; 4708 } 4709 return (RDMA_SUCCESS); 4710 } else 4711 return (RDMA_FAILED); 4712 } 4713 4714 #if defined(MEASURE_POOL_DEPTH) 4715 static void rib_recv_bufs(uint32_t x) { 4716 return; 4717 } 4718 static void rib_send_bufs(uint32_t x) { 4719 return; 4720 } 4721 #endif 4722 4723 /* 4724 * Fetch a buffer of specified type. 4725 * Note that rdbuf->handle is mw's rkey. 4726 */ 4727 static void * 4728 rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf) 4729 { 4730 rib_qp_t *qp = ctoqp(conn); 4731 rib_hca_t *hca = qp->hca; 4732 rdma_btype ptype = rdbuf->type; 4733 void *buf; 4734 rib_bufpool_t *rbp = NULL; 4735 bufpool_t *bp; 4736 int i; 4737 4738 /* 4739 * Obtain pool address based on type of pool 4740 */ 4741 switch (ptype) { 4742 case SEND_BUFFER: 4743 rbp = hca->send_pool; 4744 break; 4745 case RECV_BUFFER: 4746 rbp = hca->recv_pool; 4747 break; 4748 default: 4749 return (NULL); 4750 } 4751 if (rbp == NULL) 4752 return (NULL); 4753 4754 bp = rbp->bpool; 4755 4756 mutex_enter(&bp->buflock); 4757 if (bp->buffree < 0) { 4758 cmn_err(CE_WARN, "rib_rbuf_alloc: No free buffers!"); 4759 mutex_exit(&bp->buflock); 4760 return (NULL); 4761 } 4762 4763 /* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */ 4764 buf = bp->buflist[bp->buffree]; 4765 rdbuf->addr = buf; 4766 rdbuf->len = bp->rsize; 4767 for (i = bp->numelems - 1; i >= 0; i--) { 4768 if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) { 4769 rdbuf->handle.mrc_rmr = (uint32_t)rbp->mr_desc[i].md_rkey; 4770 rdbuf->handle.mrc_linfo = (uintptr_t)rbp->mr_hdl[i]; 4771 rdbuf->handle.mrc_lmr = (uint32_t)rbp->mr_desc[i].md_lkey; 4772 #if defined(MEASURE_POOL_DEPTH) 4773 if(ptype == SEND_BUFFER) 4774 rib_send_bufs(MAX_BUFS - (bp->buffree+1)); 4775 if(ptype == RECV_BUFFER) 4776 rib_recv_bufs(MAX_BUFS - (bp->buffree+1)); 4777 #endif 4778 bp->buffree--; 4779 if (rib_debug > 1) 4780 cmn_err(CE_NOTE, "rib_rbuf_alloc: %d free bufs " 4781 "(type %d)\n", bp->buffree+1, ptype); 4782 4783 mutex_exit(&bp->buflock); 4784 4785 return (buf); 4786 } 4787 } 4788 cmn_err(CE_WARN, "rib_rbuf_alloc: NO matching buf %p of " 4789 "type %d found!", buf, ptype); 4790 mutex_exit(&bp->buflock); 4791 4792 return (NULL); 4793 } 4794 4795 static void 4796 rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf) 4797 { 4798 4799 rib_rbuf_free(conn, rdbuf->type, rdbuf->addr); 4800 } 4801 4802 static void 4803 rib_rbuf_free(CONN *conn, int ptype, void *buf) 4804 { 4805 rib_qp_t *qp = ctoqp(conn); 4806 rib_hca_t *hca = qp->hca; 4807 rib_bufpool_t *rbp = NULL; 4808 bufpool_t *bp; 4809 4810 /* 4811 * Obtain pool address based on type of pool 4812 */ 4813 switch (ptype) { 4814 case SEND_BUFFER: 4815 rbp = hca->send_pool; 4816 break; 4817 case RECV_BUFFER: 4818 rbp = hca->recv_pool; 4819 break; 4820 default: 4821 return; 4822 } 4823 if (rbp == NULL) 4824 return; 4825 4826 bp = rbp->bpool; 4827 4828 mutex_enter(&bp->buflock); 4829 if (++bp->buffree >= bp->numelems) { 4830 /* 4831 * Should never happen 4832 */ 4833 cmn_err(CE_WARN, "rib_rbuf_free: One (type %d) " 4834 "too many frees!", ptype); 4835 bp->buffree--; 4836 } else { 4837 bp->buflist[bp->buffree] = buf; 4838 if (rib_debug > 1) 4839 cmn_err(CE_NOTE, "rib_rbuf_free: %d free bufs " 4840 "(type %d)\n", bp->buffree+1, ptype); 4841 } 4842 mutex_exit(&bp->buflock); 4843 } 4844 4845 static rdma_stat 4846 rib_add_connlist(CONN *cn, rib_conn_list_t *connlist) 4847 { 4848 rw_enter(&connlist->conn_lock, RW_WRITER); 4849 if (connlist->conn_hd) { 4850 cn->c_next = connlist->conn_hd; 4851 connlist->conn_hd->c_prev = cn; 4852 } 4853 connlist->conn_hd = cn; 4854 rw_exit(&connlist->conn_lock); 4855 4856 return (RDMA_SUCCESS); 4857 } 4858 4859 static rdma_stat 4860 rib_rm_conn(CONN *cn, rib_conn_list_t *connlist) 4861 { 4862 rw_enter(&connlist->conn_lock, RW_WRITER); 4863 if (cn->c_prev) { 4864 cn->c_prev->c_next = cn->c_next; 4865 } 4866 if (cn->c_next) { 4867 cn->c_next->c_prev = cn->c_prev; 4868 } 4869 if (connlist->conn_hd == cn) 4870 connlist->conn_hd = cn->c_next; 4871 rw_exit(&connlist->conn_lock); 4872 4873 return (RDMA_SUCCESS); 4874 } 4875 4876 /* 4877 * Connection management. 4878 * IBTF does not support recycling of channels. So connections are only 4879 * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR or 4880 * C_DISCONN_PEND state. No C_IDLE state. 4881 * C_CONN_PEND state: Connection establishment in progress to the server. 4882 * C_CONNECTED state: A connection when created is in C_CONNECTED state. 4883 * It has an RC channel associated with it. ibt_post_send/recv are allowed 4884 * only in this state. 4885 * C_ERROR state: A connection transitions to this state when WRs on the 4886 * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event 4887 * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA. 4888 * C_DISCONN_PEND state: When a connection is in C_ERROR state and when 4889 * c_ref drops to 0 (this indicates that RPC has no more references to this 4890 * connection), the connection should be destroyed. A connection transitions 4891 * into this state when it is being destroyed. 4892 */ 4893 static rdma_stat 4894 rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn) 4895 { 4896 CONN *cn; 4897 int status = RDMA_SUCCESS; 4898 rib_hca_t *hca = (rib_hca_t *)handle; 4899 rib_qp_t *qp; 4900 clock_t cv_stat, timout; 4901 ibt_path_info_t path; 4902 4903 again: 4904 rw_enter(&hca->cl_conn_list.conn_lock, RW_READER); 4905 cn = hca->cl_conn_list.conn_hd; 4906 while (cn != NULL) { 4907 /* 4908 * First, clear up any connection in the ERROR state 4909 */ 4910 mutex_enter(&cn->c_lock); 4911 if (cn->c_state == C_ERROR) { 4912 if (cn->c_ref == 0) { 4913 /* 4914 * Remove connection from list and destroy it. 4915 */ 4916 cn->c_state = C_DISCONN_PEND; 4917 mutex_exit(&cn->c_lock); 4918 rw_exit(&hca->cl_conn_list.conn_lock); 4919 (void) rib_disconnect_channel(cn, 4920 &hca->cl_conn_list); 4921 goto again; 4922 } 4923 mutex_exit(&cn->c_lock); 4924 cn = cn->c_next; 4925 continue; 4926 } else if (cn->c_state == C_DISCONN_PEND) { 4927 mutex_exit(&cn->c_lock); 4928 cn = cn->c_next; 4929 continue; 4930 } 4931 if ((cn->c_raddr.len == svcaddr->len) && 4932 bcmp(svcaddr->buf, cn->c_raddr.buf, svcaddr->len) == 0) { 4933 /* 4934 * Our connection. Give up conn list lock 4935 * as we are done traversing the list. 4936 */ 4937 rw_exit(&hca->cl_conn_list.conn_lock); 4938 if (cn->c_state == C_CONNECTED) { 4939 cn->c_ref++; /* sharing a conn */ 4940 mutex_exit(&cn->c_lock); 4941 *conn = cn; 4942 return (status); 4943 } 4944 if (cn->c_state == C_CONN_PEND) { 4945 /* 4946 * Hold a reference to this conn before 4947 * we give up the lock. 4948 */ 4949 cn->c_ref++; 4950 timout = ddi_get_lbolt() + 4951 drv_usectohz(CONN_WAIT_TIME * 1000000); 4952 while ((cv_stat = cv_timedwait_sig(&cn->c_cv, 4953 &cn->c_lock, timout)) > 0 && 4954 cn->c_state == C_CONN_PEND) 4955 ; 4956 if (cv_stat == 0) { 4957 cn->c_ref--; 4958 mutex_exit(&cn->c_lock); 4959 return (RDMA_INTR); 4960 } 4961 if (cv_stat < 0) { 4962 cn->c_ref--; 4963 mutex_exit(&cn->c_lock); 4964 return (RDMA_TIMEDOUT); 4965 } 4966 if (cn->c_state == C_CONNECTED) { 4967 *conn = cn; 4968 mutex_exit(&cn->c_lock); 4969 return (status); 4970 } else { 4971 cn->c_ref--; 4972 mutex_exit(&cn->c_lock); 4973 return (RDMA_TIMEDOUT); 4974 } 4975 } 4976 } 4977 mutex_exit(&cn->c_lock); 4978 cn = cn->c_next; 4979 } 4980 rw_exit(&hca->cl_conn_list.conn_lock); 4981 4982 status = rib_chk_srv_ats(hca, svcaddr, addr_type, &path); 4983 if (status != RDMA_SUCCESS) { 4984 #ifdef DEBUG 4985 if (rib_debug) { 4986 cmn_err(CE_WARN, "rib_conn_get: " 4987 "No server ATS record!"); 4988 } 4989 #endif 4990 return (RDMA_FAILED); 4991 } 4992 4993 /* 4994 * Channel to server doesn't exist yet, create one. 4995 */ 4996 if (rib_clnt_create_chan(hca, svcaddr, &qp) != RDMA_SUCCESS) { 4997 return (RDMA_FAILED); 4998 } 4999 cn = qptoc(qp); 5000 cn->c_state = C_CONN_PEND; 5001 cn->c_ref = 1; 5002 5003 /* 5004 * Add to conn list. 5005 * We had given up the READER lock. In the time since then, 5006 * another thread might have created the connection we are 5007 * trying here. But for now, that is quiet alright - there 5008 * might be two connections between a pair of hosts instead 5009 * of one. If we really want to close that window, 5010 * then need to check the list after acquiring the 5011 * WRITER lock. 5012 */ 5013 (void) rib_add_connlist(cn, &hca->cl_conn_list); 5014 status = rib_conn_to_srv(hca, qp, &path); 5015 mutex_enter(&cn->c_lock); 5016 if (status == RDMA_SUCCESS) { 5017 cn->c_state = C_CONNECTED; 5018 *conn = cn; 5019 } else { 5020 cn->c_state = C_ERROR; 5021 cn->c_ref--; 5022 #ifdef DEBUG 5023 if (rib_debug) { 5024 cmn_err(CE_WARN, "rib_conn_get: FAILED creating" 5025 " a channel!"); 5026 } 5027 #endif 5028 } 5029 cv_broadcast(&cn->c_cv); 5030 mutex_exit(&cn->c_lock); 5031 return (status); 5032 } 5033 5034 static rdma_stat 5035 rib_conn_release(CONN *conn) 5036 { 5037 rib_qp_t *qp = ctoqp(conn); 5038 5039 mutex_enter(&conn->c_lock); 5040 conn->c_ref--; 5041 5042 /* 5043 * If a conn is C_ERROR, close the channel. 5044 * If it's CONNECTED, keep it that way. 5045 */ 5046 if (conn->c_ref == 0 && (conn->c_state & C_ERROR)) { 5047 conn->c_state = C_DISCONN_PEND; 5048 mutex_exit(&conn->c_lock); 5049 if (qp->mode == RIB_SERVER) 5050 (void) rib_disconnect_channel(conn, 5051 &qp->hca->srv_conn_list); 5052 else 5053 (void) rib_disconnect_channel(conn, 5054 &qp->hca->cl_conn_list); 5055 return (RDMA_SUCCESS); 5056 } 5057 mutex_exit(&conn->c_lock); 5058 return (RDMA_SUCCESS); 5059 } 5060 5061 /* 5062 * Add at front of list 5063 */ 5064 static struct rdma_done_list * 5065 rdma_done_add(rib_qp_t *qp, uint32_t xid) 5066 { 5067 struct rdma_done_list *rd; 5068 5069 ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 5070 5071 rd = kmem_alloc(sizeof (*rd), KM_SLEEP); 5072 rd->xid = xid; 5073 cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL); 5074 5075 rd->prev = NULL; 5076 rd->next = qp->rdlist; 5077 if (qp->rdlist != NULL) 5078 qp->rdlist->prev = rd; 5079 qp->rdlist = rd; 5080 5081 return (rd); 5082 } 5083 5084 static void 5085 rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd) 5086 { 5087 struct rdma_done_list *r; 5088 5089 ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 5090 5091 r = rd->next; 5092 if (r != NULL) { 5093 r->prev = rd->prev; 5094 } 5095 5096 r = rd->prev; 5097 if (r != NULL) { 5098 r->next = rd->next; 5099 } else { 5100 qp->rdlist = rd->next; 5101 } 5102 5103 cv_destroy(&rd->rdma_done_cv); 5104 kmem_free(rd, sizeof (*rd)); 5105 } 5106 5107 static void 5108 rdma_done_rem_list(rib_qp_t *qp) 5109 { 5110 struct rdma_done_list *r, *n; 5111 5112 mutex_enter(&qp->rdlist_lock); 5113 for (r = qp->rdlist; r != NULL; r = n) { 5114 n = r->next; 5115 rdma_done_rm(qp, r); 5116 } 5117 mutex_exit(&qp->rdlist_lock); 5118 } 5119 5120 static void 5121 rdma_done_notify(rib_qp_t *qp, uint32_t xid) 5122 { 5123 struct rdma_done_list *r = qp->rdlist; 5124 5125 ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 5126 5127 while (r) { 5128 if (r->xid == xid) { 5129 cv_signal(&r->rdma_done_cv); 5130 return; 5131 } else { 5132 r = r->next; 5133 } 5134 } 5135 if (rib_debug > 1) { 5136 cmn_err(CE_WARN, "rdma_done_notify: " 5137 "No matching xid for %u, qp %p\n", xid, (void *)qp); 5138 } 5139 } 5140 5141 rpcib_ats_t * 5142 get_ibd_entry(ib_gid_t *gid, ib_pkey_t pkey, rpcib_ibd_insts_t *ibds) 5143 { 5144 rpcib_ats_t *atsp; 5145 int i; 5146 5147 for (i = 0, atsp = ibds->rib_ats; i < ibds->rib_ibd_cnt; i++, atsp++) { 5148 if (atsp->ras_port_gid.gid_prefix == gid->gid_prefix && 5149 atsp->ras_port_gid.gid_guid == gid->gid_guid && 5150 atsp->ras_pkey == pkey) { 5151 return (atsp); 5152 } 5153 } 5154 return (NULL); 5155 } 5156 5157 int 5158 rib_get_ibd_insts_cb(dev_info_t *dip, void *arg) 5159 { 5160 rpcib_ibd_insts_t *ibds = (rpcib_ibd_insts_t *)arg; 5161 rpcib_ats_t *atsp; 5162 ib_pkey_t pkey; 5163 uint8_t port; 5164 ib_guid_t hca_guid; 5165 ib_gid_t port_gid; 5166 5167 if (i_ddi_devi_attached(dip) && 5168 (strcmp(ddi_node_name(dip), "ibport") == 0) && 5169 (strstr(ddi_get_name_addr(dip), "ipib") != NULL)) { 5170 5171 if (ibds->rib_ibd_cnt >= ibds->rib_ibd_alloc) { 5172 rpcib_ats_t *tmp; 5173 5174 tmp = (rpcib_ats_t *)kmem_zalloc((ibds->rib_ibd_alloc + 5175 N_IBD_INSTANCES) * sizeof (rpcib_ats_t), KM_SLEEP); 5176 bcopy(ibds->rib_ats, tmp, 5177 ibds->rib_ibd_alloc * sizeof (rpcib_ats_t)); 5178 kmem_free(ibds->rib_ats, 5179 ibds->rib_ibd_alloc * sizeof (rpcib_ats_t)); 5180 ibds->rib_ats = tmp; 5181 ibds->rib_ibd_alloc += N_IBD_INSTANCES; 5182 } 5183 if (((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, 5184 dip, 0, "hca-guid", 0)) == 0) || 5185 ((port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 5186 0, "port-number", 0)) == 0) || 5187 (ibt_get_port_state_byguid(hca_guid, port, 5188 &port_gid, NULL) != IBT_SUCCESS) || 5189 ((pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 5190 "port-pkey", IB_PKEY_INVALID_LIMITED)) <= 5191 IB_PKEY_INVALID_FULL)) { 5192 return (DDI_WALK_CONTINUE); 5193 } 5194 atsp = &ibds->rib_ats[ibds->rib_ibd_cnt]; 5195 atsp->ras_inst = ddi_get_instance(dip); 5196 atsp->ras_pkey = pkey; 5197 atsp->ras_port_gid = port_gid; 5198 ibds->rib_ibd_cnt++; 5199 } 5200 return (DDI_WALK_CONTINUE); 5201 } 5202 5203 void 5204 rib_get_ibd_insts(rpcib_ibd_insts_t *ibds) 5205 { 5206 ddi_walk_devs(ddi_root_node(), rib_get_ibd_insts_cb, ibds); 5207 } 5208 5209 /* 5210 * Return ibd interfaces and ibd instances. 5211 */ 5212 int 5213 get_ibd_ipaddr(rpcib_ibd_insts_t *ibds) 5214 { 5215 TIUSER *tiptr, *tiptr6; 5216 vnode_t *kvp, *kvp6; 5217 vnode_t *vp = NULL, *vp6 = NULL; 5218 struct strioctl iocb; 5219 struct lifreq lif_req; 5220 int k, ip_cnt; 5221 rpcib_ats_t *atsp; 5222 5223 if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, 5224 &kvp) == 0) { 5225 if (t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE, 5226 &tiptr, CRED()) == 0) { 5227 vp = tiptr->fp->f_vnode; 5228 } else { 5229 VN_RELE(kvp); 5230 } 5231 } 5232 5233 if (lookupname("/dev/udp6", UIO_SYSSPACE, FOLLOW, NULLVPP, 5234 &kvp6) == 0) { 5235 if (t_kopen((file_t *)NULL, kvp6->v_rdev, FREAD|FWRITE, 5236 &tiptr6, CRED()) == 0) { 5237 vp6 = tiptr6->fp->f_vnode; 5238 } else { 5239 VN_RELE(kvp6); 5240 } 5241 } 5242 5243 if (vp == NULL && vp6 == NULL) 5244 return (-1); 5245 5246 /* Get ibd ip's */ 5247 ip_cnt = 0; 5248 for (k = 0, atsp = ibds->rib_ats; k < ibds->rib_ibd_cnt; k++, atsp++) { 5249 /* IPv4 */ 5250 if (vp != NULL) { 5251 (void) bzero((void *)&lif_req, sizeof (struct lifreq)); 5252 (void) snprintf(lif_req.lifr_name, 5253 sizeof (lif_req.lifr_name), "%s%d", 5254 IBD_NAME, atsp->ras_inst); 5255 5256 (void) bzero((void *)&iocb, sizeof (struct strioctl)); 5257 iocb.ic_cmd = SIOCGLIFADDR; 5258 iocb.ic_timout = 0; 5259 iocb.ic_len = sizeof (struct lifreq); 5260 iocb.ic_dp = (caddr_t)&lif_req; 5261 if (kstr_ioctl(vp, I_STR, (intptr_t)&iocb) == 0) { 5262 atsp->ras_inet_type = AF_INET; 5263 bcopy(&lif_req.lifr_addr, &atsp->ras_sin, 5264 sizeof (struct sockaddr_in)); 5265 ip_cnt++; 5266 continue; 5267 } 5268 } 5269 /* Try IPv6 */ 5270 if (vp6 != NULL) { 5271 (void) bzero((void *)&lif_req, sizeof (struct lifreq)); 5272 (void) snprintf(lif_req.lifr_name, 5273 sizeof (lif_req.lifr_name), "%s%d", 5274 IBD_NAME, atsp->ras_inst); 5275 5276 (void) bzero((void *)&iocb, sizeof (struct strioctl)); 5277 iocb.ic_cmd = SIOCGLIFADDR; 5278 iocb.ic_timout = 0; 5279 iocb.ic_len = sizeof (struct lifreq); 5280 iocb.ic_dp = (caddr_t)&lif_req; 5281 if (kstr_ioctl(vp6, I_STR, (intptr_t)&iocb) == 0) { 5282 5283 atsp->ras_inet_type = AF_INET6; 5284 bcopy(&lif_req.lifr_addr, &atsp->ras_sin6, 5285 sizeof (struct sockaddr_in6)); 5286 ip_cnt++; 5287 } 5288 } 5289 } 5290 5291 if (vp6 != NULL) { 5292 (void) t_kclose(tiptr6, 0); 5293 VN_RELE(kvp6); 5294 } 5295 if (vp != NULL) { 5296 (void) t_kclose(tiptr, 0); 5297 VN_RELE(kvp); 5298 } 5299 5300 if (ip_cnt == 0) 5301 return (-1); 5302 else 5303 return (0); 5304 } 5305 5306 char ** 5307 get_ip_addrs(int *count) 5308 { 5309 TIUSER *tiptr; 5310 vnode_t *kvp; 5311 int num_of_ifs; 5312 char **addresses; 5313 int return_code; 5314 5315 /* 5316 * Open a device for doing down stream kernel ioctls 5317 */ 5318 return_code = lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, 5319 NULLVPP, &kvp); 5320 if (return_code != 0) { 5321 cmn_err(CE_NOTE, "get_Ip_addrs: lookupname failed\n"); 5322 *count = -1; 5323 return (NULL); 5324 } 5325 5326 return_code = t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE, 5327 &tiptr, CRED()); 5328 if (return_code != 0) { 5329 cmn_err(CE_NOTE, "get_Ip_addrs: t_kopen failed\n"); 5330 VN_RELE(kvp); 5331 *count = -1; 5332 return (NULL); 5333 } 5334 5335 /* 5336 * Perform the first ioctl to get the number of interfaces 5337 */ 5338 return_code = get_interfaces(tiptr, &num_of_ifs); 5339 if (return_code != 0 || num_of_ifs == 0) { 5340 cmn_err(CE_NOTE, "get_Ip_addrs: get_interfaces failed\n"); 5341 (void) t_kclose(tiptr, 0); 5342 VN_RELE(kvp); 5343 *count = -1; 5344 return (NULL); 5345 } 5346 5347 /* 5348 * Perform the second ioctl to get the address on each interface 5349 * found. 5350 */ 5351 addresses = kmem_zalloc(num_of_ifs * sizeof (char *), KM_SLEEP); 5352 return_code = find_addrs(tiptr, addresses, num_of_ifs); 5353 if (return_code <= 0) { 5354 cmn_err(CE_NOTE, "get_Ip_addrs: find_addrs failed\n"); 5355 (void) t_kclose(tiptr, 0); 5356 kmem_free(addresses, num_of_ifs * sizeof (char *)); 5357 VN_RELE(kvp); 5358 *count = -1; 5359 return (NULL); 5360 } 5361 5362 *count = return_code; 5363 VN_RELE(kvp); 5364 (void) t_kclose(tiptr, 0); 5365 return (addresses); 5366 } 5367 5368 int 5369 get_interfaces(TIUSER *tiptr, int *num) 5370 { 5371 struct lifnum if_buf; 5372 struct strioctl iocb; 5373 vnode_t *vp; 5374 int return_code; 5375 5376 /* 5377 * Prep the number of interfaces request buffer for ioctl 5378 */ 5379 (void) bzero((void *)&if_buf, sizeof (struct lifnum)); 5380 if_buf.lifn_family = AF_UNSPEC; 5381 if_buf.lifn_flags = 0; 5382 5383 /* 5384 * Prep the kernel ioctl buffer and send it down stream 5385 */ 5386 (void) bzero((void *)&iocb, sizeof (struct strioctl)); 5387 iocb.ic_cmd = SIOCGLIFNUM; 5388 iocb.ic_timout = 0; 5389 iocb.ic_len = sizeof (if_buf); 5390 iocb.ic_dp = (caddr_t)&if_buf; 5391 5392 vp = tiptr->fp->f_vnode; 5393 return_code = kstr_ioctl(vp, I_STR, (intptr_t)&iocb); 5394 if (return_code != 0) { 5395 cmn_err(CE_NOTE, "get_interfaces: kstr_ioctl failed\n"); 5396 *num = -1; 5397 return (-1); 5398 } 5399 5400 *num = if_buf.lifn_count; 5401 #ifdef DEBUG 5402 if (rib_debug > 1) 5403 cmn_err(CE_NOTE, "Number of interfaces detected: %d\n", 5404 if_buf.lifn_count); 5405 #endif 5406 return (0); 5407 } 5408 5409 int 5410 find_addrs(TIUSER *tiptr, char **addrs, int num_ifs) 5411 { 5412 struct lifconf lifc; 5413 struct lifreq *if_data_buf; 5414 struct strioctl iocb; 5415 caddr_t request_buffer; 5416 struct sockaddr_in *sin4; 5417 struct sockaddr_in6 *sin6; 5418 vnode_t *vp; 5419 int i, count, return_code; 5420 5421 /* 5422 * Prep the buffer for requesting all interface's info 5423 */ 5424 (void) bzero((void *)&lifc, sizeof (struct lifconf)); 5425 lifc.lifc_family = AF_UNSPEC; 5426 lifc.lifc_flags = 0; 5427 lifc.lifc_len = num_ifs * sizeof (struct lifreq); 5428 5429 request_buffer = kmem_zalloc(num_ifs * sizeof (struct lifreq), 5430 KM_SLEEP); 5431 5432 lifc.lifc_buf = request_buffer; 5433 5434 /* 5435 * Prep the kernel ioctl buffer and send it down stream 5436 */ 5437 (void) bzero((void *)&iocb, sizeof (struct strioctl)); 5438 iocb.ic_cmd = SIOCGLIFCONF; 5439 iocb.ic_timout = 0; 5440 iocb.ic_len = sizeof (struct lifconf); 5441 iocb.ic_dp = (caddr_t)&lifc; 5442 5443 vp = tiptr->fp->f_vnode; 5444 return_code = kstr_ioctl(vp, I_STR, (intptr_t)&iocb); 5445 if (return_code != 0) { 5446 cmn_err(CE_NOTE, "find_addrs: kstr_ioctl failed\n"); 5447 kmem_free(request_buffer, num_ifs * sizeof (struct lifreq)); 5448 return (-1); 5449 } 5450 5451 /* 5452 * Extract addresses and fill them in the requested array 5453 * IB_SVC_NAME_LEN is defined to be 64 so it covers both IPv4 & 5454 * IPv6. Here count is the number of IP addresses collected. 5455 */ 5456 if_data_buf = lifc.lifc_req; 5457 count = 0; 5458 for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, 5459 if_data_buf++) { 5460 if (if_data_buf->lifr_addr.ss_family == AF_INET) { 5461 sin4 = (struct sockaddr_in *)&if_data_buf->lifr_addr; 5462 addrs[count] = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP); 5463 (void) inet_ntop(AF_INET, &sin4->sin_addr, 5464 addrs[count], IB_SVC_NAME_LEN); 5465 count ++; 5466 } 5467 5468 if (if_data_buf->lifr_addr.ss_family == AF_INET6) { 5469 sin6 = (struct sockaddr_in6 *)&if_data_buf->lifr_addr; 5470 addrs[count] = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP); 5471 (void) inet_ntop(AF_INET6, &sin6->sin6_addr, 5472 addrs[count], IB_SVC_NAME_LEN); 5473 count ++; 5474 } 5475 } 5476 5477 kmem_free(request_buffer, num_ifs * sizeof (struct lifreq)); 5478 return (count); 5479 } 5480 5481 /* 5482 * Goes through all connections and closes the channel 5483 * This will cause all the WRs on those channels to be 5484 * flushed. 5485 */ 5486 static void 5487 rib_close_channels(rib_conn_list_t *connlist) 5488 { 5489 CONN *conn; 5490 rib_qp_t *qp; 5491 5492 rw_enter(&connlist->conn_lock, RW_READER); 5493 conn = connlist->conn_hd; 5494 while (conn != NULL) { 5495 mutex_enter(&conn->c_lock); 5496 qp = ctoqp(conn); 5497 if (conn->c_state & C_CONNECTED) { 5498 /* 5499 * Live connection in CONNECTED state. 5500 * Call ibt_close_rc_channel in nonblocking mode 5501 * with no callbacks. 5502 */ 5503 conn->c_state = C_ERROR; 5504 (void) ibt_close_rc_channel(qp->qp_hdl, 5505 IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 0); 5506 (void) ibt_free_channel(qp->qp_hdl); 5507 qp->qp_hdl = NULL; 5508 } else { 5509 if (conn->c_state == C_ERROR && 5510 qp->qp_hdl != NULL) { 5511 /* 5512 * Connection in ERROR state but 5513 * channel is not yet freed. 5514 */ 5515 (void) ibt_close_rc_channel(qp->qp_hdl, 5516 IBT_NOCALLBACKS, NULL, 0, NULL, 5517 NULL, 0); 5518 (void) ibt_free_channel(qp->qp_hdl); 5519 qp->qp_hdl = NULL; 5520 } 5521 } 5522 mutex_exit(&conn->c_lock); 5523 conn = conn->c_next; 5524 } 5525 rw_exit(&connlist->conn_lock); 5526 } 5527 5528 /* 5529 * Frees up all connections that are no longer being referenced 5530 */ 5531 static void 5532 rib_purge_connlist(rib_conn_list_t *connlist) 5533 { 5534 CONN *conn; 5535 5536 top: 5537 rw_enter(&connlist->conn_lock, RW_READER); 5538 conn = connlist->conn_hd; 5539 while (conn != NULL) { 5540 mutex_enter(&conn->c_lock); 5541 5542 /* 5543 * At this point connection is either in ERROR 5544 * or DISCONN_PEND state. If in DISCONN_PEND state 5545 * then some other thread is culling that connection. 5546 * If not and if c_ref is 0, then destroy the connection. 5547 */ 5548 if (conn->c_ref == 0 && 5549 conn->c_state != C_DISCONN_PEND) { 5550 /* 5551 * Cull the connection 5552 */ 5553 conn->c_state = C_DISCONN_PEND; 5554 mutex_exit(&conn->c_lock); 5555 rw_exit(&connlist->conn_lock); 5556 (void) rib_disconnect_channel(conn, connlist); 5557 goto top; 5558 } else { 5559 /* 5560 * conn disconnect already scheduled or will 5561 * happen from conn_release when c_ref drops to 0. 5562 */ 5563 mutex_exit(&conn->c_lock); 5564 } 5565 conn = conn->c_next; 5566 } 5567 rw_exit(&connlist->conn_lock); 5568 5569 /* 5570 * At this point, only connections with c_ref != 0 are on the list 5571 */ 5572 } 5573 5574 /* 5575 * Cleans and closes up all uses of the HCA 5576 */ 5577 static void 5578 rib_detach_hca(rib_hca_t *hca) 5579 { 5580 5581 /* 5582 * Stop all services on the HCA 5583 * Go through cl_conn_list and close all rc_channels 5584 * Go through svr_conn_list and close all rc_channels 5585 * Free connections whose c_ref has dropped to 0 5586 * Destroy all CQs 5587 * Deregister and released all buffer pool memory after all 5588 * connections are destroyed 5589 * Free the protection domain 5590 * ibt_close_hca() 5591 */ 5592 rw_enter(&hca->state_lock, RW_WRITER); 5593 if (hca->state == HCA_DETACHED) { 5594 rw_exit(&hca->state_lock); 5595 return; 5596 } 5597 5598 hca->state = HCA_DETACHED; 5599 rib_stat->nhca_inited--; 5600 5601 rib_stop_services(hca); 5602 rib_deregister_ats(); 5603 rib_close_channels(&hca->cl_conn_list); 5604 rib_close_channels(&hca->srv_conn_list); 5605 rw_exit(&hca->state_lock); 5606 5607 rib_purge_connlist(&hca->cl_conn_list); 5608 rib_purge_connlist(&hca->srv_conn_list); 5609 5610 (void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl); 5611 (void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl); 5612 (void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl); 5613 (void) ibt_free_cq(hca->svc_scq->rib_cq_hdl); 5614 kmem_free(hca->clnt_rcq, sizeof (rib_cq_t)); 5615 kmem_free(hca->clnt_scq, sizeof (rib_cq_t)); 5616 kmem_free(hca->svc_rcq, sizeof (rib_cq_t)); 5617 kmem_free(hca->svc_scq, sizeof (rib_cq_t)); 5618 5619 rw_enter(&hca->srv_conn_list.conn_lock, RW_READER); 5620 rw_enter(&hca->cl_conn_list.conn_lock, RW_READER); 5621 if (hca->srv_conn_list.conn_hd == NULL && 5622 hca->cl_conn_list.conn_hd == NULL) { 5623 /* 5624 * conn_lists are NULL, so destroy 5625 * buffers, close hca and be done. 5626 */ 5627 rib_rbufpool_destroy(hca, RECV_BUFFER); 5628 rib_rbufpool_destroy(hca, SEND_BUFFER); 5629 #ifdef SERVER_REG_CACHE 5630 rib_destroy_cache(hca); 5631 #endif 5632 (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 5633 (void) ibt_close_hca(hca->hca_hdl); 5634 hca->hca_hdl = NULL; 5635 } 5636 rw_exit(&hca->cl_conn_list.conn_lock); 5637 rw_exit(&hca->srv_conn_list.conn_lock); 5638 5639 if (hca->hca_hdl != NULL) { 5640 mutex_enter(&hca->inuse_lock); 5641 while (hca->inuse) 5642 cv_wait(&hca->cb_cv, &hca->inuse_lock); 5643 mutex_exit(&hca->inuse_lock); 5644 /* 5645 * conn_lists are now NULL, so destroy 5646 * buffers, close hca and be done. 5647 */ 5648 rib_rbufpool_destroy(hca, RECV_BUFFER); 5649 rib_rbufpool_destroy(hca, SEND_BUFFER); 5650 (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 5651 (void) ibt_close_hca(hca->hca_hdl); 5652 hca->hca_hdl = NULL; 5653 } 5654 } 5655 5656 #ifdef SERVER_REG_CACHE 5657 5658 static void 5659 rib_server_side_cache_reclaim(void *argp) 5660 { 5661 cache_avl_struct_t *rcas; 5662 rib_lrc_entry_t *rb; 5663 rib_hca_t *hca = (rib_hca_t *)argp; 5664 5665 rw_enter(&hca->avl_rw_lock,RW_WRITER); 5666 rcas = avl_first(&hca->avl_tree); 5667 if(rcas != NULL) 5668 avl_remove(&hca->avl_tree, rcas); 5669 while(rcas != NULL){ 5670 while(rcas->r.forw != &rcas->r){ 5671 rcas->elements--; 5672 rb = rcas->r.forw; 5673 remque(rb); 5674 rib_deregistermem_via_hca(hca, rb->lrc_buf, rb->lrc_mhandle); 5675 kmem_free(rb->lrc_buf, rb->lrc_len); 5676 kmem_free(rb, sizeof(rib_lrc_entry_t)); 5677 } 5678 mutex_destroy(&rcas->node_lock); 5679 kmem_cache_free(hca->server_side_cache,rcas); 5680 rcas = avl_first(&hca->avl_tree); 5681 if(rcas != NULL) 5682 avl_remove(&hca->avl_tree, rcas); 5683 } 5684 rw_exit(&hca->avl_rw_lock); 5685 } 5686 5687 static int avl_compare(const void *t1,const void *t2) { 5688 5689 if(rib_debug > 1) 5690 cmn_err(CE_NOTE,"Comparing %d and %d\n",((cache_avl_struct_t *)t1)->len, ((cache_avl_struct_t *)t2)->len); 5691 if(((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len) 5692 return 0; 5693 5694 if(((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len) 5695 return -1; 5696 5697 if(((cache_avl_struct_t *)t1)->len > ((cache_avl_struct_t *)t2)->len) 5698 return 1; 5699 } 5700 5701 static void rib_destroy_cache(rib_hca_t *hca) { 5702 cache_avl_struct_t *rcas, *root; 5703 rib_lrc_entry_t *rb; 5704 5705 hca->avl_init = FALSE; 5706 kmem_cache_destroy(hca->server_side_cache); 5707 avl_destroy(&hca->avl_tree); 5708 rw_destroy(&hca->avl_rw_lock); 5709 5710 } 5711 5712 static rib_lrc_entry_t * 5713 rib_get_server_cache_buf(CONN *conn,uint32_t len) 5714 { 5715 cache_avl_struct_t cas,*rcas; 5716 rib_hca_t *hca = (ctoqp(conn))->hca; 5717 rib_lrc_entry_t *reply_buf; 5718 avl_index_t where = NULL; 5719 struct rib_lrc_entry *forw = NULL; 5720 if(!hca->avl_init) 5721 goto error_alloc; 5722 cas.len = len; 5723 rw_enter(&hca->avl_rw_lock, RW_READER); 5724 if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){ 5725 rw_exit(&hca->avl_rw_lock); 5726 rw_enter(&hca->avl_rw_lock, RW_WRITER); 5727 /* Recheck to make sure no other thread added the entry in */ 5728 if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, &where)) == NULL){ 5729 /* Allocate an avl tree entry */ 5730 if(rib_debug > 1) 5731 cmn_err(CE_NOTE,"Allocating an avl entry for length %d\n",len); 5732 rcas = (cache_avl_struct_t *)kmem_cache_alloc(hca->server_side_cache,KM_SLEEP); 5733 bzero(rcas, sizeof(cache_avl_struct_t)); 5734 rcas->elements = 0; 5735 rcas->r.forw = 5736 &rcas->r; 5737 rcas->r.back = 5738 &rcas->r; 5739 rcas->len = len; 5740 mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL); 5741 avl_insert(&hca->avl_tree,rcas,where); 5742 } 5743 } 5744 if(rcas->elements > 0){ 5745 mutex_enter(&rcas->node_lock); 5746 reply_buf = rcas->r.forw; 5747 remque(reply_buf); 5748 rcas->elements --; 5749 mutex_exit(&rcas->node_lock); 5750 rw_exit(&hca->avl_rw_lock); 5751 if(rib_debug > 1) 5752 cmn_err(CE_NOTE,"Allocating a pre-alloced buffer for length %d\n",len); 5753 } else { 5754 rw_exit(&hca->avl_rw_lock); 5755 rib_total_buffers ++; 5756 if(rib_debug > 1) 5757 cmn_err(CE_NOTE,"Allocating a new buffer for length %d\n",len); 5758 /* Allocate a reply_buf entry */ 5759 reply_buf = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP); 5760 bzero(reply_buf,sizeof(rib_lrc_entry_t)); 5761 reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP); 5762 reply_buf->lrc_len = len; 5763 reply_buf->registered = FALSE; 5764 reply_buf->avl_node = (void *)rcas; 5765 } 5766 5767 return reply_buf; 5768 error_alloc: 5769 reply_buf = (rib_lrc_entry_t *)kmem_alloc(sizeof(rib_lrc_entry_t), KM_SLEEP); 5770 bzero(reply_buf,sizeof(rib_lrc_entry_t)); 5771 reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP); 5772 reply_buf->lrc_len = len; 5773 reply_buf->registered = FALSE; 5774 reply_buf->avl_node = NULL; 5775 return reply_buf; 5776 } 5777 5778 /* 5779 * Return a pre-registered back to the cache (without 5780 * unregistering the buffer).. 5781 */ 5782 5783 static void 5784 rib_free_server_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf) 5785 { 5786 cache_avl_struct_t cas,*rcas; 5787 avl_index_t where = NULL; 5788 rib_hca_t *hca = (ctoqp(conn))->hca; 5789 if(!reg_buf){ 5790 cmn_err(CE_WARN,"Got a null reg_buf\n"); 5791 return; 5792 } 5793 if(!hca->avl_init) 5794 goto error_free; 5795 cas.len = reg_buf->lrc_len; 5796 rw_enter(&hca->avl_rw_lock, RW_READER); 5797 if((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,&cas,&where)) == NULL){ 5798 rw_exit(&hca->avl_rw_lock); 5799 goto error_free; 5800 } else { 5801 mutex_enter(&rcas->node_lock); 5802 insque(reg_buf,&rcas->r); 5803 rcas->elements ++; 5804 mutex_exit(&rcas->node_lock); 5805 rw_exit(&hca->avl_rw_lock); 5806 if(rib_debug > 1) 5807 cmn_err(CE_NOTE,"Returning buffer for length %d\n",reg_buf->lrc_len); 5808 } 5809 return; 5810 error_free: 5811 rib_deregistermem_via_hca(hca, reg_buf->lrc_buf, reg_buf->lrc_mhandle); 5812 kmem_free(reg_buf->lrc_buf,reg_buf->lrc_len); 5813 kmem_free(reg_buf,sizeof(rib_lrc_entry_t)); 5814 } 5815 5816 #endif 5817 5818 static rdma_stat 5819 rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf, 5820 uint_t buflen, struct mrc *buf_handle) 5821 { 5822 ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 5823 #ifdef IB_FMR_SUP 5824 ibt_pmr_desc_t pmr_desc; /* vaddr, lkey, rkey */ 5825 ibt_ma_hdl_t ma_hdl = NULL; 5826 #endif 5827 ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 5828 rdma_stat status; 5829 5830 5831 /* 5832 * Note: ALL buffer pools use the same memory type RDMARW. 5833 */ 5834 /* This code will not be activated on the server. We could remove 5835 the call to rib_reg_mem_fmr. But leave it in, in case the FMR 5836 bugs get fixed. The bigger question is whether we need FMR when 5837 the registered bufffers are coming out of a slab cache. This needs 5838 to be evaluated. 5839 */ 5840 #ifdef IB_FMR_SUP 5841 status = rib_reg_mem_fmr(hca, buf, adsp, buflen, 0, &mr_hdl, &ma_hdl, 5842 &pmr_desc); 5843 if (status == RDMA_SUCCESS) { 5844 buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 5845 buf_handle->mrc_lmr = (uint32_t)pmr_desc.pmd_lkey; 5846 buf_handle->mrc_rmr = (uint32_t)pmr_desc.pmd_rkey; 5847 buf_handle->mrc_lma = (uintptr_t)ma_hdl; 5848 goto ret_stat; 5849 } else { 5850 buf_handle->mrc_linfo = NULL; 5851 buf_handle->mrc_lma = NULL; 5852 buf_handle->mrc_lmr = 0; 5853 buf_handle->mrc_rmr = 0; 5854 } 5855 #endif 5856 status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 5857 if (status == RDMA_SUCCESS) { 5858 buf_handle->mrc_linfo = (uint64_t)mr_hdl; 5859 buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 5860 buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 5861 } else { 5862 buf_handle->mrc_linfo = NULL; 5863 buf_handle->mrc_lmr = 0; 5864 buf_handle->mrc_rmr = 0; 5865 } 5866 ret_stat: 5867 return (status); 5868 } 5869 5870 /* ARGSUSED */ 5871 static rdma_stat 5872 rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf, 5873 struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle) 5874 { 5875 5876 (void) rib_deregistermem_via_hca(hca, buf, buf_handle); 5877 5878 return (RDMA_SUCCESS); 5879 } 5880 5881 /* ARGSUSED */ 5882 static rdma_stat 5883 rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle) 5884 { 5885 #ifdef IB_FMR_SUP 5886 ibt_status_t ibt_status; 5887 if(buf_handle.mrc_lma){ 5888 ibt_status = ibt_unmap_mem_area(hca->hca_hdl, 5889 (ibt_ma_hdl_t)buf_handle.mrc_lma); 5890 if (ibt_status != IBT_SUCCESS){ 5891 cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed", 5892 ibt_status); 5893 return (RDMA_FAILED); 5894 } 5895 ibt_status = ibt_deregister_fmr(hca->hca_hdl, 5896 (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo); 5897 if (ibt_status != IBT_SUCCESS){ 5898 cmn_err(CE_WARN,"rib_deregistermem: ibt_unmap_mem_area: %d failed", 5899 ibt_status); 5900 return (RDMA_FAILED); 5901 } 5902 return (RDMA_SUCCESS); 5903 } 5904 #endif 5905 5906 (void) ibt_deregister_mr(hca->hca_hdl, 5907 (ibt_mr_hdl_t)buf_handle.mrc_linfo); 5908 return (RDMA_SUCCESS); 5909 } 5910 5911 #if defined(ASYNC_SERVER_DEREG)||defined(ASYNC_CLIENT_DEREG) 5912 static int 5913 clist_deregister1(CONN *conn, struct clist *cl, bool_t src) 5914 { 5915 struct clist *c; 5916 5917 for (c = cl; c; c = c->c_next) { 5918 if (src) { 5919 if (c->c_smemhandle.mrc_rmr != 0) { 5920 (void) RDMA_DEREGMEMSYNC(conn, 5921 (caddr_t)(uintptr_t)c->c_saddr, 5922 c->c_smemhandle, 5923 #ifdef SERVER_REG_CACHE 5924 (void *)(uintptr_t)c->c_ssynchandle, (void *)c->long_reply_buf); 5925 #else 5926 (void *)(uintptr_t)c->c_ssynchandle); 5927 #endif 5928 c->c_smemhandle.mrc_rmr = 0; 5929 c->c_ssynchandle = NULL; 5930 } 5931 } else { 5932 if (c->c_dmemhandle.mrc_rmr != 0) { 5933 (void) RDMA_DEREGMEMSYNC(conn, 5934 (caddr_t)(uintptr_t)c->c_daddr, 5935 c->c_dmemhandle, 5936 #ifdef SERVER_REG_CACHE 5937 (void *)(uintptr_t)c->c_dsynchandle, (void *)c->long_reply_buf); 5938 #else 5939 (void *)(uintptr_t)c->c_dsynchandle); 5940 #endif 5941 c->c_dmemhandle.mrc_rmr = 0; 5942 c->c_dsynchandle = NULL; 5943 } 5944 } 5945 } 5946 5947 return (RDMA_SUCCESS); 5948 } 5949 #endif 5950 5951 5952 5953 #if defined(ASYNC_CLIENT_DEREG) 5954 static void 5955 async_dereg_thread(caddr_t arg){ 5956 ASYNC *r; 5957 cmn_err(CE_WARN,"async_dereg_thread initiated\n"); 5958 fetch_another_entry: 5959 mutex_enter(&at_mutex); 5960 while ((rqueue.forw == rqueue.back) && (rqueue.forw == &rqueue)) 5961 cv_wait(&at_cond, &at_mutex); 5962 r=rqueue.forw; 5963 remque(rqueue.forw); 5964 mutex_exit(&at_mutex); 5965 /* Process deregistration */ 5966 clist_deregister1(&r->c_conn, &r->c_clist, FALSE); 5967 kmem_free(r, sizeof(ASYNC)); 5968 goto fetch_another_entry; 5969 5970 } 5971 void insert_queue(CONN *conn, struct clist *rwc){ 5972 ASYNC *r; 5973 r=kmem_zalloc(sizeof(ASYNC),KM_SLEEP); 5974 r->c_clist = *rwc; 5975 r->c_conn = *conn; 5976 mutex_enter(&at_mutex); 5977 insque(r,&rqueue); 5978 cv_broadcast(&at_cond); 5979 mutex_exit(&at_mutex); 5980 } 5981 #endif--- EOF ---