Old ib.h
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #ifndef _IB_H
28 #define _IB_H
29
30 #pragma ident "@(#)ib.h 1.8 05/06/08 SMI"
31
32 /*
33 * ib.h, rpcib plugin interface.
34 */
35
36 #include <sys/types.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/conf.h>
40 #include <sys/stat.h>
41 #include <rpc/rpc.h>
42 #include <rpc/rpc_rdma.h>
43 #include <sys/ib/ibtl/ibti.h>
44
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48
49 #define MAX_BUFS 256 /* max no. of buffers per pool */
50 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */
51 /*
52 * Tavor returns the next higher power of 2
53 * CQ entries than the requested size.
54 * For instance, if you request (2^12 - 1)
55 * CQ entries, Tavor returns 2^12 entries.
56 * 4K CQ entries suffice. Hence, 4096 - 1.
57 */
58 #define DEF_SQ_SIZE 128 /* default SendQ size */
59 #define DEF_RQ_SIZE 256 /* default RecvQ size */
60 #define DSEG_MAX 2
61 #define RQ_DSEG_MAX 1 /* default RQ data seg */
62 #define IBSRM_HB 0x8000 /* high order bit of pkey */
63 #define NFS_SEC_KEY0 0x6878 /* randomly selected NFS security key */
64 #define NFS_SEC_KEY1 0x8679
65
66 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */
67 #define REFRESH_ATTEMPTS 3
68
69 typedef struct rib_hca_s rib_hca_t;
70 typedef struct rib_qp_s rib_qp_t;
71 typedef struct rib_cq_s rib_cq_t;
72
73 /*
74 * Notification for RDMA_DONE is based on xid
75 */
76 struct rdma_done_list {
77 uint32_t xid; /* XID waiting for RDMA_DONE */
78 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */
79 struct rdma_done_list *next;
80 struct rdma_done_list *prev;
81 };
82
83 /*
84 * State of the plugin.
85 * ACCEPT = accepting new connections and requests
86 * NO_ACCEPT = not accepting new connection and requests
87 */
88 #define ACCEPT 1
89 #define NO_ACCEPT 2
90
91 /*
92 * Send Wait states
93 */
94 #define SEND_WAIT -1
95
96 /*
97 * Reply states
98 */
99 #define REPLY_WAIT -1
100
101 typedef void * rib_pvoid;
102 typedef rib_pvoid RIB_SYNCMEM_HANDLE;
103
104 /*
105 * IB buffer pool management structure
106 */
107
108 /*
109 * Buffer pool info
110 */
111 typedef struct {
112 kmutex_t buflock; /* lock for this structure */
113 caddr_t buf; /* pool address */
114 uint32_t bufhandle; /* rkey for this pool */
115 ulong_t bufsize; /* size of pool */
116 int rsize; /* size of each element */
117 int numelems; /* no. of elements allocated */
118 int buffree; /* no. of free elements */
119 void *buflist[1]; /* free elements in pool */
120 } bufpool_t;
121
122 typedef struct {
123 bufpool_t *bpool;
124 ibt_mr_hdl_t *mr_hdl;
125 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */
126 } rib_bufpool_t;
127
128 /*
129 * ATS relsted defines and structures.
130 */
131 #define ATS_AR_DATA_LEN 16
132 #define IBD_NAME "ibd"
133 #define N_IBD_INSTANCES 4
134
135 typedef struct rpcib_ats_s {
136 int ras_inst;
137 ib_pkey_t ras_pkey;
138 ib_gid_t ras_port_gid;
139 sa_family_t ras_inet_type;
140 union {
141 struct sockaddr_in ras_sockaddr;
142 struct sockaddr_in6 ras_sockaddr6;
143 } ra_sin;
144 #define ras_sin ra_sin.ras_sockaddr
145 #define ras_sin6 ra_sin.ras_sockaddr6
146 } rpcib_ats_t;
147
148 typedef struct rpcib_ibd_insts_s {
149 int rib_ibd_alloc;
150 int rib_ibd_cnt;
151 rpcib_ats_t *rib_ats;
152 } rpcib_ibd_insts_t;
153
154 /*
155 * Service types supported by RPCIB
156 * For now only NFS is supported.
157 */
158 #define NFS 1
159 #define NLM 2
160
161 /*
162 * Tracks consumer state (client or server).
163 */
164 typedef enum {
165 RIB_SERVER,
166 RIB_CLIENT
167 } rib_mode_t;
168
169 /*
170 * CQ structure
171 */
172 struct rib_cq_s {
173 rib_hca_t *rib_hca;
174 ibt_cq_hdl_t rib_cq_hdl;
175 };
176
177 /*
178 * RPCIB plugin state
179 */
180 typedef struct rpcib_state {
181 ibt_clnt_hdl_t ibt_clnt_hdl;
182 uint32_t hca_count;
183 uint32_t nhca_inited;
184 ib_guid_t *hca_guids;
185 rib_hca_t *hcas;
186 int refcount;
187 kmutex_t open_hca_lock;
188 rib_hca_t *hca; /* the hca being used */
189 queue_t *q; /* up queue for a serv_type */
190 uint32_t service_type; /* NFS, NLM, etc */
191 void *private;
192 } rpcib_state_t;
193
194 /*
195 * Each registered service's data structure.
196 * Each HCA has a list of these structures, which are the registered
197 * services on this HCA.
198 */
199 typedef struct rib_service rib_service_t;
200 struct rib_service {
201 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */
202
203 /*
204 * service name, i.e, <IP>::NFS or <IP>::NLM. Since
205 * each type of service can be registered with many
206 * IP addrs(srv_name) and is running on all ports
207 * for all HCAs.
208 */
209 char *srv_name;
210
211 uint32_t srv_port; /* port on which registered */
212 ib_svc_id_t srv_id; /* from ibt_register call */
213 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */
214 ibt_sbind_hdl_t *srv_sbind_hdl; /* from ibt_bind call */
215 ibt_ar_t srv_ar;
216
217 /*
218 * pointer to the next service registered on this
219 * particular HCA
220 */
221 rib_service_t *srv_next;
222 };
223
224 /*
225 * Connection lists
226 */
227 typedef struct {
228 krwlock_t conn_lock; /* list lock */
229 CONN *conn_hd; /* list head */
230 } rib_conn_list_t;
231
232 enum hca_state {
233 HCA_INITED, /* hca in up and running state */
234 HCA_DETACHED /* hca in detached state */
235 };
236
237 /*
238 * RPCIB per HCA structure
239 */
240 struct rib_hca_s {
241 ibt_clnt_hdl_t ibt_clnt_hdl;
242
243 /*
244 * per HCA.
245 */
246 ibt_hca_hdl_t hca_hdl; /* HCA handle */
247 ibt_hca_attr_t hca_attrs; /* HCA attributes */
248 ibt_pd_hdl_t pd_hdl;
249 ib_guid_t hca_guid;
250 uint32_t hca_nports;
251 ibt_hca_portinfo_t *hca_ports;
252 size_t hca_pinfosz;
253 enum hca_state state; /* state of HCA */
254 krwlock_t state_lock; /* protects state field */
255 bool_t inuse; /* indicates HCA usage */
256 kmutex_t inuse_lock; /* protects inuse field */
257 /*
258 * List of services registered on all ports available
259 * on this HCA. Only one consumer of KRPC can register
260 * its services at one time or tear them down at one
261 * time.
262 */
263 rib_service_t *service_list;
264 krwlock_t service_list_lock;
265
266 rib_service_t *ats_list; /* Service list for ATS */
267
268 rib_conn_list_t cl_conn_list; /* client conn list */
269 rib_conn_list_t srv_conn_list; /* server conn list */
270
271 rib_cq_t *clnt_scq;
272 rib_cq_t *clnt_rcq;
273 rib_cq_t *svc_scq;
274 rib_cq_t *svc_rcq;
275 kmutex_t cb_lock;
276 kcondvar_t cb_cv;
277
278 rib_bufpool_t *recv_pool; /* recv buf pool */
279 rib_bufpool_t *send_pool; /* send buf pool */
280
281 void *iblock; /* interrupt cookie */
282 };
283
284
285 /*
286 * Structure on wait state of a post send
287 */
288 struct send_wid {
289 uint32_t xid;
290 int cv_sig;
291 kmutex_t sendwait_lock;
292 kcondvar_t wait_cv;
293 uint_t status;
294 rib_qp_t *qp;
295 int nsbufs; /* # of send buffers posted */
296 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */
297 };
298
299 /*
300 * Structure on reply descriptor for recv queue.
301 * Different from the above posting of a descriptor.
302 */
303 struct reply {
304 uint32_t xid;
305 uint_t status;
306 uint64_t vaddr_cq; /* buf addr from CQ */
307 uint_t bytes_xfer;
308 kcondvar_t wait_cv;
309 struct reply *next;
310 struct reply *prev;
311 };
312
313 struct svc_recv {
314 rib_qp_t *qp;
315 uint64_t vaddr;
316 uint_t bytes_xfer;
317 };
318
319 struct recv_wid {
320 uint32_t xid;
321 rib_qp_t *qp;
322 uint64_t addr; /* posted buf addr */
323 };
324
325 /*
326 * Per QP data structure
327 */
328 struct rib_qp_s {
329 rib_hca_t *hca;
330 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */
331 CONN rdmaconn;
332 ibt_channel_hdl_t qp_hdl;
333 uint_t port_num;
334 ib_qpn_t qpn;
335 int chan_flags;
336 clock_t timeout;
337 ibt_rc_chan_query_attr_t qp_q_attrs;
338 rib_cq_t *send_cq; /* send CQ */
339 rib_cq_t *recv_cq; /* recv CQ */
340
341 /*
342 * Number of pre-posted rbufs
343 */
344 uint_t n_posted_rbufs;
345 kcondvar_t posted_rbufs_cv;
346 kmutex_t posted_rbufs_lock;
347
348 /*
349 * RPC reply
350 */
351 uint_t rep_list_size;
352 struct reply *replylist;
353 kmutex_t replylist_lock;
354
355 /*
356 * server only, RDMA_DONE
357 */
358 struct rdma_done_list *rdlist;
359 kmutex_t rdlist_lock;
360
361 kmutex_t cb_lock;
362 kcondvar_t cb_conn_cv;
363
364 caddr_t q; /* upstream queue */
365 };
366
367 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private))
368 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn))
369
370 /*
371 * Timeout for various calls
372 */
373 #define CONN_WAIT_TIME 40
374 #define SEND_WAIT_TIME 40 /* time for send completion */
375
376 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */
377
378 #ifdef __cplusplus
379 }
380 #endif
381
382 #endif /* !_IB_H */