1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 2006, The Ohio State University. All rights reserved. 28 * 29 * Portions of this source code is developed by the team members of 30 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 31 * headed by Professor Dhabaleswar K. (DK) Panda. 32 * 33 * Acknowledgements to contributions from developors: 34 * Ranjit Noronha: noronha@cse.ohio-state.edu 35 * Lei Chai : chail@cse.ohio-state.edu 36 * Weikuan Yu : yuw@cse.ohio-state.edu 37 * 38 */ 39 #ifndef _IB_H 40 #define _IB_H 41 42 #pragma ident "@(#)ib.h 1.8 05/06/08 SMI" 43 44 /* 45 * ib.h, rpcib plugin interface. 46 */ 47 48 #include <sys/types.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/conf.h> 52 #include <sys/stat.h> 53 #include <rpc/rpc.h> 54 #include <rpc/rpc_rdma.h> 55 #include <sys/ib/ibtl/ibti.h> 56 #ifdef SERVER_REG_CACHE 57 #include <sys/avl.h> 58 #endif 59 60 #ifdef __cplusplus 61 extern "C" { 62 #endif 63 64 #define MAX_BUFS 256 /* max no. of buffers per pool */ 65 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */ 66 /* 67 * Tavor returns the next higher power of 2 68 * CQ entries than the requested size. 69 * For instance, if you request (2^12 - 1) 70 * CQ entries, Tavor returns 2^12 entries. 71 * 4K CQ entries suffice. Hence, 4096 - 1. 72 */ 73 #define DEF_SQ_SIZE 128 /* default SendQ size */ 74 #define DEF_RQ_SIZE 256 /* default RecvQ size */ 75 #define DSEG_MAX 2 76 #define RQ_DSEG_MAX 1 /* default RQ data seg */ 77 #define IBSRM_HB 0x8000 /* high order bit of pkey */ 78 #define NFS_SEC_KEY0 0x6878 /* randomly selected NFS security key */ 79 #define NFS_SEC_KEY1 0x8679 80 81 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */ 82 #define REFRESH_ATTEMPTS 3 83 84 typedef struct rib_hca_s rib_hca_t; 85 typedef struct rib_qp_s rib_qp_t; 86 typedef struct rib_cq_s rib_cq_t; 87 88 /* 89 * Notification for RDMA_DONE is based on xid 90 */ 91 struct rdma_done_list { 92 uint32_t xid; /* XID waiting for RDMA_DONE */ 93 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */ 94 struct rdma_done_list *next; 95 struct rdma_done_list *prev; 96 }; 97 98 /* 99 * State of the plugin. 100 * ACCEPT = accepting new connections and requests 101 * NO_ACCEPT = not accepting new connection and requests 102 */ 103 #define ACCEPT 1 104 #define NO_ACCEPT 2 105 106 /* 107 * Send Wait states 108 */ 109 #define SEND_WAIT -1 110 111 /* 112 * Reply states 113 */ 114 #define REPLY_WAIT -1 115 116 typedef void * rib_pvoid; 117 typedef rib_pvoid RIB_SYNCMEM_HANDLE; 118 119 /* 120 * IB buffer pool management structure 121 */ 122 123 /* 124 * Buffer pool info 125 */ 126 typedef struct { 127 kmutex_t buflock; /* lock for this structure */ 128 caddr_t buf; /* pool address */ 129 uint32_t bufhandle; /* rkey for this pool */ 130 ulong_t bufsize; /* size of pool */ 131 int rsize; /* size of each element */ 132 int numelems; /* no. of elements allocated */ 133 int buffree; /* no. of free elements */ 134 void *buflist[1]; /* free elements in pool */ 135 } bufpool_t; 136 137 typedef struct { 138 bufpool_t *bpool; 139 ibt_mr_hdl_t *mr_hdl; 140 #ifdef IB_FMR_SUP 141 ibt_ma_hdl_t *ma_hdl; 142 ibt_pmr_desc_t *pmr_desc; 143 #endif 144 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */ 145 146 } rib_bufpool_t; 147 148 /* 149 * ATS relsted defines and structures. 150 */ 151 #define ATS_AR_DATA_LEN 16 152 #define IBD_NAME "ibd" 153 #define N_IBD_INSTANCES 4 154 155 typedef struct rpcib_ats_s { 156 int ras_inst; 157 ib_pkey_t ras_pkey; 158 ib_gid_t ras_port_gid; 159 sa_family_t ras_inet_type; 160 union { 161 struct sockaddr_in ras_sockaddr; 162 struct sockaddr_in6 ras_sockaddr6; 163 } ra_sin; 164 #define ras_sin ra_sin.ras_sockaddr 165 #define ras_sin6 ra_sin.ras_sockaddr6 166 } rpcib_ats_t; 167 168 typedef struct rpcib_ibd_insts_s { 169 int rib_ibd_alloc; 170 int rib_ibd_cnt; 171 rpcib_ats_t *rib_ats; 172 } rpcib_ibd_insts_t; 173 174 /* 175 * Service types supported by RPCIB 176 * For now only NFS is supported. 177 */ 178 #define NFS 1 179 #define NLM 2 180 181 /* 182 * Tracks consumer state (client or server). 183 */ 184 typedef enum { 185 RIB_SERVER, 186 RIB_CLIENT 187 } rib_mode_t; 188 189 /* 190 * CQ structure 191 */ 192 struct rib_cq_s { 193 rib_hca_t *rib_hca; 194 ibt_cq_hdl_t rib_cq_hdl; 195 }; 196 197 /* 198 * RPCIB plugin state 199 */ 200 typedef struct rpcib_state { 201 ibt_clnt_hdl_t ibt_clnt_hdl; 202 uint32_t hca_count; 203 uint32_t nhca_inited; 204 ib_guid_t *hca_guids; 205 rib_hca_t *hcas; 206 int refcount; 207 kmutex_t open_hca_lock; 208 rib_hca_t *hca; /* the hca being used */ 209 queue_t *q; /* up queue for a serv_type */ 210 uint32_t service_type; /* NFS, NLM, etc */ 211 void *private; 212 } rpcib_state_t; 213 214 /* 215 * Each registered service's data structure. 216 * Each HCA has a list of these structures, which are the registered 217 * services on this HCA. 218 */ 219 typedef struct rib_service rib_service_t; 220 struct rib_service { 221 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */ 222 223 /* 224 * service name, i.e, <IP>::NFS or <IP>::NLM. Since 225 * each type of service can be registered with many 226 * IP addrs(srv_name) and is running on all ports 227 * for all HCAs. 228 */ 229 char *srv_name; 230 231 uint32_t srv_port; /* port on which registered */ 232 ib_svc_id_t srv_id; /* from ibt_register call */ 233 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */ 234 ibt_sbind_hdl_t *srv_sbind_hdl; /* from ibt_bind call */ 235 ibt_ar_t srv_ar; 236 237 /* 238 * pointer to the next service registered on this 239 * particular HCA 240 */ 241 rib_service_t *srv_next; 242 }; 243 244 /* 245 * Connection lists 246 */ 247 typedef struct { 248 krwlock_t conn_lock; /* list lock */ 249 CONN *conn_hd; /* list head */ 250 } rib_conn_list_t; 251 252 enum hca_state { 253 HCA_INITED, /* hca in up and running state */ 254 HCA_DETACHED /* hca in detached state */ 255 }; 256 257 /* 258 * RPCIB per HCA structure 259 */ 260 struct rib_hca_s { 261 ibt_clnt_hdl_t ibt_clnt_hdl; 262 263 /* 264 * per HCA. 265 */ 266 ibt_hca_hdl_t hca_hdl; /* HCA handle */ 267 ibt_hca_attr_t hca_attrs; /* HCA attributes */ 268 ibt_pd_hdl_t pd_hdl; 269 ib_guid_t hca_guid; 270 uint32_t hca_nports; 271 ibt_hca_portinfo_t *hca_ports; 272 size_t hca_pinfosz; 273 enum hca_state state; /* state of HCA */ 274 krwlock_t state_lock; /* protects state field */ 275 bool_t inuse; /* indicates HCA usage */ 276 kmutex_t inuse_lock; /* protects inuse field */ 277 /* 278 * List of services registered on all ports available 279 * on this HCA. Only one consumer of KRPC can register 280 * its services at one time or tear them down at one 281 * time. 282 */ 283 rib_service_t *service_list; 284 krwlock_t service_list_lock; 285 286 rib_service_t *ats_list; /* Service list for ATS */ 287 288 rib_conn_list_t cl_conn_list; /* client conn list */ 289 rib_conn_list_t srv_conn_list; /* server conn list */ 290 291 rib_cq_t *clnt_scq; 292 rib_cq_t *clnt_rcq; 293 rib_cq_t *svc_scq; 294 rib_cq_t *svc_rcq; 295 kmutex_t cb_lock; 296 kcondvar_t cb_cv; 297 298 rib_bufpool_t *recv_pool; /* recv buf pool */ 299 rib_bufpool_t *send_pool; /* send buf pool */ 300 301 void *iblock; /* interrupt cookie */ 302 303 #ifdef IB_FMR_SUP 304 ibt_fmr_pool_hdl_t fmr_pool; 305 #endif /* IB_FMR_SUP */ 306 307 #ifdef SERVER_REG_CACHE 308 kmem_cache_t *server_side_cache; /* long reply pool */ 309 avl_tree_t avl_tree; 310 kmutex_t avl_lock; 311 krwlock_t avl_rw_lock; 312 volatile bool_t avl_init; 313 #endif 314 315 }; 316 317 318 /* 319 * Structure on wait state of a post send 320 */ 321 struct send_wid { 322 #if defined (CLNT_INTERRUPT_COAL) 323 struct send_wid *forw; 324 struct send_wid *back; 325 #endif 326 uint32_t xid; 327 int cv_sig; 328 kmutex_t sendwait_lock; 329 kcondvar_t wait_cv; 330 uint_t status; 331 rib_qp_t *qp; 332 int nsbufs; /* # of send buffers posted */ 333 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */ 334 caddr_t c; 335 caddr_t c1; 336 int l1; 337 caddr_t c2; 338 int l2; 339 int wl,rl; 340 }; 341 342 /* 343 * Structure on reply descriptor for recv queue. 344 * Different from the above posting of a descriptor. 345 */ 346 struct reply { 347 uint32_t xid; 348 uint_t status; 349 uint64_t vaddr_cq; /* buf addr from CQ */ 350 uint_t bytes_xfer; 351 kcondvar_t wait_cv; 352 struct reply *next; 353 struct reply *prev; 354 }; 355 356 struct svc_recv { 357 rib_qp_t *qp; 358 uint64_t vaddr; 359 uint_t bytes_xfer; 360 }; 361 362 struct recv_wid { 363 uint32_t xid; 364 rib_qp_t *qp; 365 uint64_t addr; /* posted buf addr */ 366 }; 367 368 /* 369 * Per QP data structure 370 */ 371 struct rib_qp_s { 372 rib_hca_t *hca; 373 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */ 374 CONN rdmaconn; 375 ibt_channel_hdl_t qp_hdl; 376 uint_t port_num; 377 ib_qpn_t qpn; 378 int chan_flags; 379 clock_t timeout; 380 ibt_rc_chan_query_attr_t qp_q_attrs; 381 rib_cq_t *send_cq; /* send CQ */ 382 rib_cq_t *recv_cq; /* recv CQ */ 383 384 /* 385 * Number of pre-posted rbufs 386 */ 387 uint_t n_posted_rbufs; 388 kcondvar_t posted_rbufs_cv; 389 kmutex_t posted_rbufs_lock; 390 391 /* 392 * RPC reply 393 */ 394 uint_t rep_list_size; 395 struct reply *replylist; 396 kmutex_t replylist_lock; 397 398 /* 399 * server only, RDMA_DONE 400 */ 401 struct rdma_done_list *rdlist; 402 kmutex_t rdlist_lock; 403 404 kmutex_t cb_lock; 405 kcondvar_t cb_conn_cv; 406 407 caddr_t q; /* upstream queue */ 408 struct send_wid wd; 409 }; 410 411 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private)) 412 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn)) 413 414 /* 415 * Timeout for various calls 416 */ 417 #define CONN_WAIT_TIME 40 418 #define SEND_WAIT_TIME 40 /* time for send completion */ 419 420 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */ 421 422 #ifdef __cplusplus 423 } 424 #endif 425 426 #endif /* !_IB_H */--- EOF ---