New ib.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 2006, The Ohio State University. All rights reserved.
28 *
29 * Portions of this source code is developed by the team members of
30 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
31 * headed by Professor Dhabaleswar K. (DK) Panda.
32 *
33 * Acknowledgements to contributions from developors:
34 * Ranjit Noronha: noronha@cse.ohio-state.edu
35 * Lei Chai : chail@cse.ohio-state.edu
36 * Weikuan Yu : yuw@cse.ohio-state.edu
37 *
38 */
39 #ifndef _IB_H
40 #define _IB_H
41
42 #pragma ident "@(#)ib.h 1.8 05/06/08 SMI"
43
44 /*
45 * ib.h, rpcib plugin interface.
46 */
47
48 #include <sys/types.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/conf.h>
52 #include <sys/stat.h>
53 #include <rpc/rpc.h>
54 #include <rpc/rpc_rdma.h>
55 #include <sys/ib/ibtl/ibti.h>
56 #ifdef SERVER_REG_CACHE
57 #include <sys/avl.h>
58 #endif
59
60 #ifdef __cplusplus
61 extern "C" {
62 #endif
63
64 #define MAX_BUFS 256 /* max no. of buffers per pool */
65 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */
66 /*
67 * Tavor returns the next higher power of 2
68 * CQ entries than the requested size.
69 * For instance, if you request (2^12 - 1)
70 * CQ entries, Tavor returns 2^12 entries.
71 * 4K CQ entries suffice. Hence, 4096 - 1.
72 */
73 #define DEF_SQ_SIZE 128 /* default SendQ size */
74 #define DEF_RQ_SIZE 256 /* default RecvQ size */
75 #define DSEG_MAX 2
76 #define RQ_DSEG_MAX 1 /* default RQ data seg */
77 #define IBSRM_HB 0x8000 /* high order bit of pkey */
78 #define NFS_SEC_KEY0 0x6878 /* randomly selected NFS security key */
79 #define NFS_SEC_KEY1 0x8679
80
81 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */
82 #define REFRESH_ATTEMPTS 3
83
84 typedef struct rib_hca_s rib_hca_t;
85 typedef struct rib_qp_s rib_qp_t;
86 typedef struct rib_cq_s rib_cq_t;
87
88 /*
89 * Notification for RDMA_DONE is based on xid
90 */
91 struct rdma_done_list {
92 uint32_t xid; /* XID waiting for RDMA_DONE */
93 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */
94 struct rdma_done_list *next;
95 struct rdma_done_list *prev;
96 };
97
98 /*
99 * State of the plugin.
100 * ACCEPT = accepting new connections and requests
101 * NO_ACCEPT = not accepting new connection and requests
102 */
103 #define ACCEPT 1
104 #define NO_ACCEPT 2
105
106 /*
107 * Send Wait states
108 */
109 #define SEND_WAIT -1
110
111 /*
112 * Reply states
113 */
114 #define REPLY_WAIT -1
115
116 typedef void * rib_pvoid;
117 typedef rib_pvoid RIB_SYNCMEM_HANDLE;
118
119 /*
120 * IB buffer pool management structure
121 */
122
123 /*
124 * Buffer pool info
125 */
126 typedef struct {
127 kmutex_t buflock; /* lock for this structure */
128 caddr_t buf; /* pool address */
129 uint32_t bufhandle; /* rkey for this pool */
130 ulong_t bufsize; /* size of pool */
131 int rsize; /* size of each element */
132 int numelems; /* no. of elements allocated */
133 int buffree; /* no. of free elements */
134 void *buflist[1]; /* free elements in pool */
135 } bufpool_t;
136
137 typedef struct {
138 bufpool_t *bpool;
139 ibt_mr_hdl_t *mr_hdl;
140 #ifdef IB_FMR_SUP
141 ibt_ma_hdl_t *ma_hdl;
142 ibt_pmr_desc_t *pmr_desc;
143 #endif
144 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */
145
146 } rib_bufpool_t;
147
148 /*
149 * ATS relsted defines and structures.
150 */
151 #define ATS_AR_DATA_LEN 16
152 #define IBD_NAME "ibd"
153 #define N_IBD_INSTANCES 4
154
155 typedef struct rpcib_ats_s {
156 int ras_inst;
157 ib_pkey_t ras_pkey;
158 ib_gid_t ras_port_gid;
159 sa_family_t ras_inet_type;
160 union {
161 struct sockaddr_in ras_sockaddr;
162 struct sockaddr_in6 ras_sockaddr6;
163 } ra_sin;
164 #define ras_sin ra_sin.ras_sockaddr
165 #define ras_sin6 ra_sin.ras_sockaddr6
166 } rpcib_ats_t;
167
168 typedef struct rpcib_ibd_insts_s {
169 int rib_ibd_alloc;
170 int rib_ibd_cnt;
171 rpcib_ats_t *rib_ats;
172 } rpcib_ibd_insts_t;
173
174 /*
175 * Service types supported by RPCIB
176 * For now only NFS is supported.
177 */
178 #define NFS 1
179 #define NLM 2
180
181 /*
182 * Tracks consumer state (client or server).
183 */
184 typedef enum {
185 RIB_SERVER,
186 RIB_CLIENT
187 } rib_mode_t;
188
189 /*
190 * CQ structure
191 */
192 struct rib_cq_s {
193 rib_hca_t *rib_hca;
194 ibt_cq_hdl_t rib_cq_hdl;
195 };
196
197 /*
198 * RPCIB plugin state
199 */
200 typedef struct rpcib_state {
201 ibt_clnt_hdl_t ibt_clnt_hdl;
202 uint32_t hca_count;
203 uint32_t nhca_inited;
204 ib_guid_t *hca_guids;
205 rib_hca_t *hcas;
206 int refcount;
207 kmutex_t open_hca_lock;
208 rib_hca_t *hca; /* the hca being used */
209 queue_t *q; /* up queue for a serv_type */
210 uint32_t service_type; /* NFS, NLM, etc */
211 void *private;
212 } rpcib_state_t;
213
214 /*
215 * Each registered service's data structure.
216 * Each HCA has a list of these structures, which are the registered
217 * services on this HCA.
218 */
219 typedef struct rib_service rib_service_t;
220 struct rib_service {
221 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */
222
223 /*
224 * service name, i.e, <IP>::NFS or <IP>::NLM. Since
225 * each type of service can be registered with many
226 * IP addrs(srv_name) and is running on all ports
227 * for all HCAs.
228 */
229 char *srv_name;
230
231 uint32_t srv_port; /* port on which registered */
232 ib_svc_id_t srv_id; /* from ibt_register call */
233 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */
234 ibt_sbind_hdl_t *srv_sbind_hdl; /* from ibt_bind call */
235 ibt_ar_t srv_ar;
236
237 /*
238 * pointer to the next service registered on this
239 * particular HCA
240 */
241 rib_service_t *srv_next;
242 };
243
244 /*
245 * Connection lists
246 */
247 typedef struct {
248 krwlock_t conn_lock; /* list lock */
249 CONN *conn_hd; /* list head */
250 } rib_conn_list_t;
251
252 enum hca_state {
253 HCA_INITED, /* hca in up and running state */
254 HCA_DETACHED /* hca in detached state */
255 };
256
257 /*
258 * RPCIB per HCA structure
259 */
260 struct rib_hca_s {
261 ibt_clnt_hdl_t ibt_clnt_hdl;
262
263 /*
264 * per HCA.
265 */
266 ibt_hca_hdl_t hca_hdl; /* HCA handle */
267 ibt_hca_attr_t hca_attrs; /* HCA attributes */
268 ibt_pd_hdl_t pd_hdl;
269 ib_guid_t hca_guid;
270 uint32_t hca_nports;
271 ibt_hca_portinfo_t *hca_ports;
272 size_t hca_pinfosz;
273 enum hca_state state; /* state of HCA */
274 krwlock_t state_lock; /* protects state field */
275 bool_t inuse; /* indicates HCA usage */
276 kmutex_t inuse_lock; /* protects inuse field */
277 /*
278 * List of services registered on all ports available
279 * on this HCA. Only one consumer of KRPC can register
280 * its services at one time or tear them down at one
281 * time.
282 */
283 rib_service_t *service_list;
284 krwlock_t service_list_lock;
285
286 rib_service_t *ats_list; /* Service list for ATS */
287
288 rib_conn_list_t cl_conn_list; /* client conn list */
289 rib_conn_list_t srv_conn_list; /* server conn list */
290
291 rib_cq_t *clnt_scq;
292 rib_cq_t *clnt_rcq;
293 rib_cq_t *svc_scq;
294 rib_cq_t *svc_rcq;
295 kmutex_t cb_lock;
296 kcondvar_t cb_cv;
297
298 rib_bufpool_t *recv_pool; /* recv buf pool */
299 rib_bufpool_t *send_pool; /* send buf pool */
300
301 void *iblock; /* interrupt cookie */
302
303 #ifdef IB_FMR_SUP
304 ibt_fmr_pool_hdl_t fmr_pool;
305 #endif /* IB_FMR_SUP */
306
307 #ifdef SERVER_REG_CACHE
308 kmem_cache_t *server_side_cache; /* long reply pool */
309 avl_tree_t avl_tree;
310 kmutex_t avl_lock;
311 krwlock_t avl_rw_lock;
312 volatile bool_t avl_init;
313 #endif
314
315 };
316
317
318 /*
319 * Structure on wait state of a post send
320 */
321 struct send_wid {
322 #if defined (CLNT_INTERRUPT_COAL)
323 struct send_wid *forw;
324 struct send_wid *back;
325 #endif
326 uint32_t xid;
327 int cv_sig;
328 kmutex_t sendwait_lock;
329 kcondvar_t wait_cv;
330 uint_t status;
331 rib_qp_t *qp;
332 int nsbufs; /* # of send buffers posted */
333 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */
334 caddr_t c;
335 caddr_t c1;
336 int l1;
337 caddr_t c2;
338 int l2;
339 int wl,rl;
340 };
341
342 /*
343 * Structure on reply descriptor for recv queue.
344 * Different from the above posting of a descriptor.
345 */
346 struct reply {
347 uint32_t xid;
348 uint_t status;
349 uint64_t vaddr_cq; /* buf addr from CQ */
350 uint_t bytes_xfer;
351 kcondvar_t wait_cv;
352 struct reply *next;
353 struct reply *prev;
354 };
355
356 struct svc_recv {
357 rib_qp_t *qp;
358 uint64_t vaddr;
359 uint_t bytes_xfer;
360 };
361
362 struct recv_wid {
363 uint32_t xid;
364 rib_qp_t *qp;
365 uint64_t addr; /* posted buf addr */
366 };
367
368 /*
369 * Per QP data structure
370 */
371 struct rib_qp_s {
372 rib_hca_t *hca;
373 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */
374 CONN rdmaconn;
375 ibt_channel_hdl_t qp_hdl;
376 uint_t port_num;
377 ib_qpn_t qpn;
378 int chan_flags;
379 clock_t timeout;
380 ibt_rc_chan_query_attr_t qp_q_attrs;
381 rib_cq_t *send_cq; /* send CQ */
382 rib_cq_t *recv_cq; /* recv CQ */
383
384 /*
385 * Number of pre-posted rbufs
386 */
387 uint_t n_posted_rbufs;
388 kcondvar_t posted_rbufs_cv;
389 kmutex_t posted_rbufs_lock;
390
391 /*
392 * RPC reply
393 */
394 uint_t rep_list_size;
395 struct reply *replylist;
396 kmutex_t replylist_lock;
397
398 /*
399 * server only, RDMA_DONE
400 */
401 struct rdma_done_list *rdlist;
402 kmutex_t rdlist_lock;
403
404 kmutex_t cb_lock;
405 kcondvar_t cb_conn_cv;
406
407 caddr_t q; /* upstream queue */
408 struct send_wid wd;
409 };
410
411 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private))
412 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn))
413
414 /*
415 * Timeout for various calls
416 */
417 #define CONN_WAIT_TIME 40
418 #define SEND_WAIT_TIME 40 /* time for send completion */
419
420 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */
421
422 #ifdef __cplusplus
423 }
424 #endif
425
426 #endif /* !_IB_H */